[PATCH 1/4] drm/amdgpu: Reset the devices in the XGMI hive duirng probe

Tue Feb 23 17:54:39 UTC 2021

[AMD Official Use Only - Internal Distribution Only]

Comments inline , 

-----Original Message-----
From: Alex Deucher <alexdeucher at gmail.com> 
Sent: Tuesday, February 23, 2021 11:47 AM
To: Liu, Shaoyun <Shaoyun.Liu at amd.com>
Cc: amd-gfx list <amd-gfx at lists.freedesktop.org>
Subject: Re: [PATCH 1/4] drm/amdgpu: Reset the devices in the XGMI hive duirng probe

On Thu, Feb 18, 2021 at 8:19 PM shaoyunl <shaoyun.liu at amd.com> wrote:
>
> In passthrough configuration, hypervisior will trigger the 
> SBR(Secondary bus reset) to the devices without sync to each other. 
> This could cause device hang since for XGMI configuration, all the 
> devices within the hive need to be reset at a limit time slot. This 
> serial of patches try to solve this issue by co-operate with new SMU 
> which will only do minimum house keeping to response the SBR request 
> but don't do the real reset job and leave it to driver. Driver need to do the whole sw init and minimum HW init to bring up the SMU and trigger the reset(possibly BACO) on all the ASICs at the same time with existing gpu_recovery routine.
>
> Signed-off-by: shaoyunl <shaoyun.liu at amd.com>
> Change-Id: I34e838e611b7623c7ad824704c7ce350808014fc
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 96 +++++++++++++++++-----
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h    |  1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c    |  6 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c   |  6 +-
>  4 files changed, 87 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 2f9ad7ed82be..9f574fd151bc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -1220,6 +1220,10 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev)
>                 }
>         }
>
> +       /* Don't post if we need to reset whole hive on init */
> +       if (adev->gmc.xgmi.pending_reset)
> +               return false;
> +
>         if (adev->has_hw_reset) {
>                 adev->has_hw_reset = false;
>                 return true;
> @@ -2147,6 +2151,9 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
>                         if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
>                                 continue;
>
> +                       if (!adev->ip_blocks[i].status.sw)
> +                               continue;
> +
>                         /* no need to do the fw loading again if already done*/
>                         if (adev->ip_blocks[i].status.hw == true)
>                                 break; @@ -2287,7 +2294,10 @@ static 
> int amdgpu_device_ip_init(struct amdgpu_device *adev)
>
>         if (adev->gmc.xgmi.num_physical_nodes > 1)
>                 amdgpu_xgmi_add_device(adev);
> -       amdgpu_amdkfd_device_init(adev);
> +
> +       /* Don't init kfd if whole hive need to be reset during init */
> +       if (!adev->gmc.xgmi.pending_reset)
> +               amdgpu_amdkfd_device_init(adev);
>
>         amdgpu_fru_get_product_info(adev);
>
> @@ -2731,6 +2741,16 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
>                         adev->ip_blocks[i].status.hw = false;
>                         continue;
>                 }
> +
> +               /* skip unnecessary suspend if we do not initialize them yet */
> +               if (adev->gmc.xgmi.pending_reset &&
> +                   !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
> +                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
> +                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
> +                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
> +                       adev->ip_blocks[i].status.hw = false;
> +                       continue;
> +               }
>                 /* XXX handle errors */
>                 r = adev->ip_blocks[i].version->funcs->suspend(adev);
>                 /* XXX handle errors */ @@ -3402,10 +3422,29 @@ int 
> amdgpu_device_init(struct amdgpu_device *adev,
>          *  E.g., driver was not cleanly unloaded previously, etc.
>          */
>         if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
> -               r = amdgpu_asic_reset(adev);
> -               if (r) {
> -                       dev_err(adev->dev, "asic reset on init failed\n");
> -                       goto failed;
> +               if (adev->gmc.xgmi.num_physical_nodes) {
> +                       dev_info(adev->dev, "Pending hive reset.\n");
> +                       adev->gmc.xgmi.pending_reset = true;
> +                       /* Only need to init necessary block for SMU to handle the reset */
> +                       for (i = 0; i < adev->num_ip_blocks; i++) {
> +                               if (!adev->ip_blocks[i].status.valid)
> +                                       continue;
> +                               if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
> +                                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
> +                                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
> +                                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
> +                                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
> +                                       DRM_DEBUG("IP %s disabed for hw_init.\n",
> +                                               adev->ip_blocks[i].version->funcs->name);
> +                                       adev->ip_blocks[i].status.hw = true;
> +                               }
> +                       }
> +               } else {
> +                       r = amdgpu_asic_reset(adev);
> +                       if (r) {
> +                               dev_err(adev->dev, "asic reset on init failed\n");
> +                               goto failed;
> +                       }
>                 }
>         }
>
> @@ -3536,19 +3575,19 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>         /* enable clockgating, etc. after ib tests, etc. since some blocks require
>          * explicit gating rather than handling it automatically.
>          */
> -       r = amdgpu_device_ip_late_init(adev);
> -       if (r) {
> -               dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
> -               amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
> -               goto failed;
> +       if (!adev->gmc.xgmi.pending_reset) {
> +               r = amdgpu_device_ip_late_init(adev);
> +               if (r) {
> +                       dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
> +                       amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
> +                       goto failed;
> +               }
> +               /* must succeed. */
> +               amdgpu_ras_resume(adev);
> +               queue_delayed_work(system_wq, &adev->delayed_init_work,
> +                                  
> + msecs_to_jiffies(AMDGPU_RESUME_MS));
>         }
>
> -       /* must succeed. */
> -       amdgpu_ras_resume(adev);
> -
> -       queue_delayed_work(system_wq, &adev->delayed_init_work,
> -                          msecs_to_jiffies(AMDGPU_RESUME_MS));
> -
>         if (amdgpu_sriov_vf(adev))
>                 flush_delayed_work(&adev->delayed_init_work);
>
> @@ -3565,6 +3604,18 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>         if (amdgpu_device_cache_pci_state(adev->pdev))
>                 pci_restore_state(pdev);
>
> +       if (adev->gmc.xgmi.pending_reset) {
> +               struct amdgpu_hive_info *hive = 
> + amdgpu_get_xgmi_hive(adev);
> +
> +               if (atomic_read(&hive->number_devices) ==
> +                   adev->gmc.xgmi.num_physical_nodes) {
> +                       /* Trigger hive reset when all gpus within the hive is ready */
> +                       dev_info(adev->dev, "Trigger XGMI reset during init.\n");
> +                       amdgpu_device_gpu_recover(adev, NULL);
> +               }
> +               amdgpu_put_xgmi_hive(hive);
> +       }
> +
>         return 0;
>
>  failed:
> @@ -4241,7 +4292,9 @@ static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
>         int i, r = 0;
>         bool need_full_reset  = *need_full_reset_arg;
>
> -       amdgpu_debugfs_wait_dump(adev);
> +       /* no need to dump if device is not in good state during probe period */
> +       if (!adev->gmc.xgmi.pending_reset)
> +               amdgpu_debugfs_wait_dump(adev);
>
>         if (amdgpu_sriov_vf(adev)) {
>                 /* stop the data exchange thread */ @@ -4304,6 +4357,7 
> @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
>                 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
>                         /* For XGMI run all resets in parallel to speed up the process */
>                         if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) 
> {
> +                               tmp_adev->gmc.xgmi.pending_reset = 
> + false;
>                                 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
>                                         r = -EALREADY;
>                         } else
> @@ -4343,10 +4397,10 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
>         list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
>                 if (need_full_reset) {
>                         /* post card */
> -                       if (amdgpu_device_asic_init(tmp_adev))
> +                       r = amdgpu_device_asic_init(tmp_adev);
> +                       if (r) {
>                                 dev_warn(tmp_adev->dev, "asic atom 
> init failed!");
> -
> -                       if (!r) {
> +                       } else {
>                                 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
>                                 r = amdgpu_device_ip_resume_phase1(tmp_adev);
>                                 if (r) @@ -4790,6 +4844,8 @@ int 
> amdgpu_device_gpu_recover(struct amdgpu_device *adev,
>                 if (audio_suspended)
>                         amdgpu_device_resume_display_audio(tmp_adev);
>                 amdgpu_device_unlock_adev(tmp_adev);
> +               /*enable buffer function after reset */

Space between * and comment, e.g., /* Enable

> +               amdgpu_ttm_set_buffer_funcs_status(tmp_adev, true);

Will this cause issues with any of the other flows through this code?

[Shaoyunl] it won't , the function internally will check whether it's in reset state . so for normal gpu recovery called on runtime , this won't have side effect. 

>         }
>
>  skip_recovery:
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> index aa0c83776ce0..8c71d84a2fbe 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> @@ -149,6 +149,7 @@ struct amdgpu_xgmi {
>         struct list_head head;
>         bool supported;
>         struct ras_common_if *ras_if;
> +       bool pending_reset;
>  };
>
>  struct amdgpu_gmc {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> index 839917eb7bc3..5e127a59b121 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> @@ -2261,6 +2261,12 @@ static int psp_load_fw(struct amdgpu_device *adev)
>                 goto failed;
>         }
>
> +       /*Don't do the real PSP HW init if we are pending on XGMI reset.
> +        *The above init probably should move to psp_sw_init
> +        */

Space between * and comment.  E.g., /* Don't

This whole thing seems kind of fragile.  Is there some way we can restructure the init code to provide two code high level code paths?
One for regular init and one for this case?  I feel like this will be prone to breakage if this code sees any changes.

Alex

[shaoyunl]  Agree, current psp code mix the SW init code in the HW init function,  to fix that will  involved some  changes not directly relative to this XGMI  reset and  other more tests . I would like to separate that from this serial change . 

Thanks 
Shaoyun.liu

> +       if (adev->gmc.xgmi.pending_reset)
> +               return 0;
> +
>  skip_memalloc:
>         ret = psp_hw_start(psp);
>         if (ret)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> index 659b385b27b5..b1c8fd90c1b0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> @@ -492,7 +492,8 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
>         if (!adev->gmc.xgmi.supported)
>                 return 0;
>
> -       if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {
> +       if (!adev->gmc.xgmi.pending_reset &&
> +           amdgpu_device_ip_get_ip_block(adev, 
> + AMD_IP_BLOCK_TYPE_PSP)) {
>                 ret = psp_xgmi_initialize(&adev->psp);
>                 if (ret) {
>                         dev_err(adev->dev, @@ -538,7 +539,8 @@ int 
> amdgpu_xgmi_add_device(struct amdgpu_device *adev)
>
>         task_barrier_add_task(&hive->tb);
>
> -       if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {
> +       if (!adev->gmc.xgmi.pending_reset &&
> +           amdgpu_device_ip_get_ip_block(adev, 
> + AMD_IP_BLOCK_TYPE_PSP)) {
>                 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
>                         /* update node list for other device in the hive */
>                         if (tmp_adev != adev) {
> --
> 2.17.1
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flist
> s.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=04%7C01%7Csh
> aoyun.liu%40amd.com%7C0641a47202924dde84c308d8d81aaaf3%7C3dd8961fe4884
> e608e11a82d994e183d%7C0%7C0%7C637496956338383230%7CUnknown%7CTWFpbGZsb
> 3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%
> 7C1000&sdata=Yvp3D7pFpvK%2BRP4StG%2BTH8O0UlWYsbPJ54MzdyKbFGQ%3D&am
> p;reserved=0