[PATCH] drm/amdgpu: fix gfx hang during suspend with video playback

Liang, Prike Prike.Liang at amd.com
Wed Apr 8 02:27:37 UTC 2020



> -----Original Message-----
> From: Kuehling, Felix <Felix.Kuehling at amd.com>
> Sent: Tuesday, April 7, 2020 11:43 PM
> To: Liang, Prike <Prike.Liang at amd.com>; amd-gfx at lists.freedesktop.org;
> Huang, Ray <Ray.Huang at amd.com>
> Cc: Deucher, Alexander <Alexander.Deucher at amd.com>; Quan, Evan
> <Evan.Quan at amd.com>
> Subject: Re: [PATCH] drm/amdgpu: fix gfx hang during suspend with video
> playback
> 
> Sorry, I missed this email thread because the subject seemed irrelevant to
> me. I still don't get why this is causing a problem with suspend/resume with
> video playback.
> 
> The functions you're changing are mostly used when running without HWS.
> This should only be the case during bring-ups or while debugging HWS issues.
> Otherwise they're only used for setting up the HIQ. That means in normal
> operation, these functions should not be used for user mode queue mapping,
> which is handled by the HWS.
[Prike]  This issue caused by improperly accessing the register CP_HQD_ACTIVE 
under GFX enter CGPG during perform destroy MQD at the stage of amdkfd suspend. 

For this solution may have an excessive guard for some MQD setup and occupy check. 
It's likely a potential common issue and have drafted v2 patch to disable GFX CGPG 
directly before perform amdgpu suspend opt. 

Thanks,
Prike

> Ray, I vaguely remember we discussed using KIQ for mapping the HIQ at
> some point. Did anyone ever propose a patch for that?
> 
> Thanks,
>   Felix
> 
> Am 2020-04-03 um 12:07 a.m. schrieb Prike Liang:
> > The system will be hang up during S3 as SMU is pending at GC not
> > respose the register CP_HQD_ACTIVE access request and this issue can
> > be fixed by adding RLC safe mode guard before each HQD map/unmap
> > retrive opt.
> >
> > Signed-off-by: Prike Liang <Prike.Liang at amd.com>
> > Tested-by: Mengbing Wang <Mengbing.Wang at amd.com>
> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 6 ++++++
> >  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c             | 4 ++++
> >  2 files changed, 10 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> > index df841c2..e265063 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> > @@ -232,6 +232,7 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void
> *mqd, uint32_t pipe_id,
> >  	uint32_t *mqd_hqd;
> >  	uint32_t reg, hqd_base, data;
> >
> > +	amdgpu_gfx_rlc_enter_safe_mode(adev);
> >  	m = get_mqd(mqd);
> >
> >  	acquire_queue(kgd, pipe_id, queue_id); @@ -299,6 +300,7 @@ int
> > kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
> >
> >  	release_queue(kgd);
> >
> > +	amdgpu_gfx_rlc_exit_safe_mode(adev);
> >  	return 0;
> >  }
> >
> > @@ -497,6 +499,7 @@ bool kgd_gfx_v9_hqd_is_occupied(struct kgd_dev
> *kgd, uint64_t queue_address,
> >  	bool retval = false;
> >  	uint32_t low, high;
> >
> > +	amdgpu_gfx_rlc_enter_safe_mode(adev);
> >  	acquire_queue(kgd, pipe_id, queue_id);
> >  	act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
> >  	if (act) {
> > @@ -508,6 +511,7 @@ bool kgd_gfx_v9_hqd_is_occupied(struct kgd_dev
> *kgd, uint64_t queue_address,
> >  			retval = true;
> >  	}
> >  	release_queue(kgd);
> > +	amdgpu_gfx_rlc_exit_safe_mode(adev);
> >  	return retval;
> >  }
> >
> > @@ -541,6 +545,7 @@ int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd,
> void *mqd,
> >  	uint32_t temp;
> >  	struct v9_mqd *m = get_mqd(mqd);
> >
> > +	amdgpu_gfx_rlc_enter_safe_mode(adev);
> >  	if (adev->in_gpu_reset)
> >  		return -EIO;
> >
> > @@ -577,6 +582,7 @@ int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd,
> void *mqd,
> >  	}
> >
> >  	release_queue(kgd);
> > +	amdgpu_gfx_rlc_exit_safe_mode(adev);
> >  	return 0;
> >  }
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > index 1fea077..ee107d9 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > @@ -3533,6 +3533,7 @@ static int gfx_v9_0_kiq_init_register(struct
> amdgpu_ring *ring)
> >  	struct v9_mqd *mqd = ring->mqd_ptr;
> >  	int j;
> >
> > +	amdgpu_gfx_rlc_enter_safe_mode(adev);
> >  	/* disable wptr polling */
> >  	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
> >
> > @@ -3629,6 +3630,7 @@ static int gfx_v9_0_kiq_init_register(struct
> amdgpu_ring *ring)
> >  	if (ring->use_doorbell)
> >  		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE,
> 1);
> >
> > +	amdgpu_gfx_rlc_exit_safe_mode(adev);
> >  	return 0;
> >  }
> >
> > @@ -3637,6 +3639,7 @@ static int gfx_v9_0_kiq_fini_register(struct
> amdgpu_ring *ring)
> >  	struct amdgpu_device *adev = ring->adev;
> >  	int j;
> >
> > +	amdgpu_gfx_rlc_enter_safe_mode(adev);
> >  	/* disable the queue if it's active */
> >  	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
> >
> > @@ -3668,6 +3671,7 @@ static int gfx_v9_0_kiq_fini_register(struct
> amdgpu_ring *ring)
> >  	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
> >  	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
> >
> > +	amdgpu_gfx_rlc_exit_safe_mode(adev);
> >  	return 0;
> >  }
> >


More information about the amd-gfx mailing list