[PATCH v2 2/3] drm/amdgpu: avoid sending csib command when system resumes from S3

Thu Oct 26 08:37:28 UTC 2023

[AMD Official Use Only - General]

Thanks a lot for the review!

Regards.
Perry

> -----Original Message-----
> From: Zhang, Yifan <Yifan1.Zhang at amd.com>
> Sent: Thursday, October 26, 2023 3:31 PM
> To: Yuan, Perry <Perry.Yuan at amd.com>; Feng, Kenneth
> <Kenneth.Feng at amd.com>; Limonciello, Mario
> <Mario.Limonciello at amd.com>
> Cc: Deucher, Alexander <Alexander.Deucher at amd.com>; Wang, Yang(Kevin)
> <KevinYang.Wang at amd.com>; amd-gfx at lists.freedesktop.org
> Subject: RE: [PATCH v2 2/3] drm/amdgpu: avoid sending csib command when
> system resumes from S3
>
> [AMD Official Use Only - General]
>
> This patch is:
>
> Reviewed-by: Yifan Zhang <yifan1.zhang at amd.com>
>
> Best Regards,
> Yifan
>
> -----Original Message-----
> From: Yuan, Perry <Perry.Yuan at amd.com>
> Sent: Tuesday, October 24, 2023 10:50 PM
> To: Zhang, Yifan <Yifan1.Zhang at amd.com>; Feng, Kenneth
> <Kenneth.Feng at amd.com>; Limonciello, Mario
> <Mario.Limonciello at amd.com>
> Cc: Deucher, Alexander <Alexander.Deucher at amd.com>; Wang, Yang(Kevin)
> <KevinYang.Wang at amd.com>; amd-gfx at lists.freedesktop.org
> Subject: [PATCH v2 2/3] drm/amdgpu: avoid sending csib command when
> system resumes from S3
>
> Previously the CSIB command pocket was sent to GFX block while amdgpu
> driver loading or S3 resuming time all the time.
> As the CP protocol required, the CSIB is not needed to send again while GC is
> not powered down while resuming from aborted S3 suspend sequence.
>
> PREAMBLE_CNTL packet coming in the ring after PG event where the RLC
> already sent its copy of CSIB, send another CSIB pocket will cause Gfx IB testing
> timeout when system resume from S3.
>
> Add flag `csib_initialized` to make sure normal S3 suspend/resume will initialize
> csib normally, when system abort to S3 suspend and resume immediately
> because of some failed suspend callback, GPU is not power down at that time,
> so csib command is not needed to send again.
>
> Error dmesg log:
> amdgpu 0000:04:00.0: [drm:amdgpu_ib_ring_tests [amdgpu]] *ERROR* IB
> test failed on gfx_0.0.0 (-110).
> [drm:amdgpu_device_delayed_init_work_handler [amdgpu]] *ERROR* ib ring
> test failed (-110).
> PM: resume of devices complete after 2373.995 msecs
> PM: Finishing wakeup.
>
> v1-v2:
>  * align csib_initialized with other failed with space[Kevin]
>  * check return value check for gfx_v10_0_wait_for_idle()[Kevin]
>
> Signed-off-by: Perry Yuan <perry.yuan at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  5 ++++
> drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 32 ++++++++++++++++++----
> ---
>  3 files changed, 30 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 44df1a5bce7f..7d570f53e66b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1114,6 +1114,7 @@ struct amdgpu_device {
>         bool                            debug_vm;
>         bool                            debug_largebar;
>         bool                            debug_disable_soft_recovery;
> +       bool                            csib_initialized;
>  };
>
>  static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 420196a17e22..a47c9f840754 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -2468,6 +2468,11 @@ static int amdgpu_pmops_suspend_noirq(struct
> device *dev)
>         if (amdgpu_acpi_should_gpu_reset(adev))
>                 return amdgpu_asic_reset(adev);
>
> +       /* update flag to make sure csib will be sent when system
> +        * resume from normal S3
> +        */
> +       adev->csib_initialized = false;
> +
>         return 0;
>  }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 6399bc71c56d..138340e1fa95 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -3481,6 +3481,7 @@ static uint64_t
> gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev);  static void
> gfx_v10_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
>                                    u32 sh_num, u32 instance, int xcc_id);  static u32
> gfx_v10_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev);
> +static int gfx_v10_0_wait_for_idle(void *handle);
>
>  static int gfx_v10_0_rlc_backdoor_autoload_buffer_init(struct
> amdgpu_device *adev);  static void
> gfx_v10_0_rlc_backdoor_autoload_buffer_fini(struct amdgpu_device *adev);
> @@ -5958,7 +5959,7 @@ static int gfx_v10_0_cp_gfx_load_microcode(struct
> amdgpu_device *adev)
>         return 0;
>  }
>
> -static int gfx_v10_0_cp_gfx_start(struct amdgpu_device *adev)
> +static int gfx_v10_csib_submit(struct amdgpu_device *adev)
>  {
>         struct amdgpu_ring *ring;
>         const struct cs_section_def *sect = NULL; @@ -5966,13 +5967,6 @@
> static int gfx_v10_0_cp_gfx_start(struct amdgpu_device *adev)
>         int r, i;
>         int ctx_reg_offset;
>
> -       /* init the CP */
> -       WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT,
> -                    adev->gfx.config.max_hw_contexts - 1);
> -       WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
> -
> -       gfx_v10_0_cp_gfx_enable(adev, true);
> -
>         ring = &adev->gfx.gfx_ring[0];
>         r = amdgpu_ring_alloc(ring, gfx_v10_0_get_csb_size(adev) + 4);
>         if (r) {
> @@ -6035,6 +6029,28 @@ static int gfx_v10_0_cp_gfx_start(struct
> amdgpu_device *adev)
>
>                 amdgpu_ring_commit(ring);
>         }
> +
> +       r = gfx_v10_0_wait_for_idle(adev);
> +       if (r)
> +               return r;
> +
> +       adev->csib_initialized = true;
> +
> +       return 0;
> +};
> +
> +static int gfx_v10_0_cp_gfx_start(struct amdgpu_device *adev) {
> +       /* init the CP */
> +       WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT,
> +                    adev->gfx.config.max_hw_contexts - 1);
> +       WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
> +
> +       gfx_v10_0_cp_gfx_enable(adev, true);
> +
> +       if (!adev->csib_initialized)
> +               gfx_v10_csib_submit(adev);
> +
>         return 0;
>  }
>
> --
> 2.34.1
>