[PATCH v2 2/3] drm/amdgpu: avoid sending csib command when system resumes from S3
Yuan, Perry
Perry.Yuan at amd.com
Thu Oct 26 08:37:28 UTC 2023
[AMD Official Use Only - General]
Thanks a lot for the review!
Regards.
Perry
> -----Original Message-----
> From: Zhang, Yifan <Yifan1.Zhang at amd.com>
> Sent: Thursday, October 26, 2023 3:31 PM
> To: Yuan, Perry <Perry.Yuan at amd.com>; Feng, Kenneth
> <Kenneth.Feng at amd.com>; Limonciello, Mario
> <Mario.Limonciello at amd.com>
> Cc: Deucher, Alexander <Alexander.Deucher at amd.com>; Wang, Yang(Kevin)
> <KevinYang.Wang at amd.com>; amd-gfx at lists.freedesktop.org
> Subject: RE: [PATCH v2 2/3] drm/amdgpu: avoid sending csib command when
> system resumes from S3
>
> [AMD Official Use Only - General]
>
> This patch is:
>
> Reviewed-by: Yifan Zhang <yifan1.zhang at amd.com>
>
> Best Regards,
> Yifan
>
> -----Original Message-----
> From: Yuan, Perry <Perry.Yuan at amd.com>
> Sent: Tuesday, October 24, 2023 10:50 PM
> To: Zhang, Yifan <Yifan1.Zhang at amd.com>; Feng, Kenneth
> <Kenneth.Feng at amd.com>; Limonciello, Mario
> <Mario.Limonciello at amd.com>
> Cc: Deucher, Alexander <Alexander.Deucher at amd.com>; Wang, Yang(Kevin)
> <KevinYang.Wang at amd.com>; amd-gfx at lists.freedesktop.org
> Subject: [PATCH v2 2/3] drm/amdgpu: avoid sending csib command when
> system resumes from S3
>
> Previously the CSIB command pocket was sent to GFX block while amdgpu
> driver loading or S3 resuming time all the time.
> As the CP protocol required, the CSIB is not needed to send again while GC is
> not powered down while resuming from aborted S3 suspend sequence.
>
> PREAMBLE_CNTL packet coming in the ring after PG event where the RLC
> already sent its copy of CSIB, send another CSIB pocket will cause Gfx IB testing
> timeout when system resume from S3.
>
> Add flag `csib_initialized` to make sure normal S3 suspend/resume will initialize
> csib normally, when system abort to S3 suspend and resume immediately
> because of some failed suspend callback, GPU is not power down at that time,
> so csib command is not needed to send again.
>
> Error dmesg log:
> amdgpu 0000:04:00.0: [drm:amdgpu_ib_ring_tests [amdgpu]] *ERROR* IB
> test failed on gfx_0.0.0 (-110).
> [drm:amdgpu_device_delayed_init_work_handler [amdgpu]] *ERROR* ib ring
> test failed (-110).
> PM: resume of devices complete after 2373.995 msecs
> PM: Finishing wakeup.
>
> v1-v2:
> * align csib_initialized with other failed with space[Kevin]
> * check return value check for gfx_v10_0_wait_for_idle()[Kevin]
>
> Signed-off-by: Perry Yuan <perry.yuan at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 5 ++++
> drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 32 ++++++++++++++++++----
> ---
> 3 files changed, 30 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 44df1a5bce7f..7d570f53e66b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1114,6 +1114,7 @@ struct amdgpu_device {
> bool debug_vm;
> bool debug_largebar;
> bool debug_disable_soft_recovery;
> + bool csib_initialized;
> };
>
> static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 420196a17e22..a47c9f840754 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -2468,6 +2468,11 @@ static int amdgpu_pmops_suspend_noirq(struct
> device *dev)
> if (amdgpu_acpi_should_gpu_reset(adev))
> return amdgpu_asic_reset(adev);
>
> + /* update flag to make sure csib will be sent when system
> + * resume from normal S3
> + */
> + adev->csib_initialized = false;
> +
> return 0;
> }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 6399bc71c56d..138340e1fa95 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -3481,6 +3481,7 @@ static uint64_t
> gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev); static void
> gfx_v10_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
> u32 sh_num, u32 instance, int xcc_id); static u32
> gfx_v10_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev);
> +static int gfx_v10_0_wait_for_idle(void *handle);
>
> static int gfx_v10_0_rlc_backdoor_autoload_buffer_init(struct
> amdgpu_device *adev); static void
> gfx_v10_0_rlc_backdoor_autoload_buffer_fini(struct amdgpu_device *adev);
> @@ -5958,7 +5959,7 @@ static int gfx_v10_0_cp_gfx_load_microcode(struct
> amdgpu_device *adev)
> return 0;
> }
>
> -static int gfx_v10_0_cp_gfx_start(struct amdgpu_device *adev)
> +static int gfx_v10_csib_submit(struct amdgpu_device *adev)
> {
> struct amdgpu_ring *ring;
> const struct cs_section_def *sect = NULL; @@ -5966,13 +5967,6 @@
> static int gfx_v10_0_cp_gfx_start(struct amdgpu_device *adev)
> int r, i;
> int ctx_reg_offset;
>
> - /* init the CP */
> - WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT,
> - adev->gfx.config.max_hw_contexts - 1);
> - WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
> -
> - gfx_v10_0_cp_gfx_enable(adev, true);
> -
> ring = &adev->gfx.gfx_ring[0];
> r = amdgpu_ring_alloc(ring, gfx_v10_0_get_csb_size(adev) + 4);
> if (r) {
> @@ -6035,6 +6029,28 @@ static int gfx_v10_0_cp_gfx_start(struct
> amdgpu_device *adev)
>
> amdgpu_ring_commit(ring);
> }
> +
> + r = gfx_v10_0_wait_for_idle(adev);
> + if (r)
> + return r;
> +
> + adev->csib_initialized = true;
> +
> + return 0;
> +};
> +
> +static int gfx_v10_0_cp_gfx_start(struct amdgpu_device *adev) {
> + /* init the CP */
> + WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT,
> + adev->gfx.config.max_hw_contexts - 1);
> + WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
> +
> + gfx_v10_0_cp_gfx_enable(adev, true);
> +
> + if (!adev->csib_initialized)
> + gfx_v10_csib_submit(adev);
> +
> return 0;
> }
>
> --
> 2.34.1
>
More information about the amd-gfx
mailing list