[PATCH v2] drm/amdgpu: Add check to prevenet IH overflow

Christian König christian.koenig at amd.com
Tue Jan 5 16:49:23 UTC 2021


Am 05.01.21 um 17:06 schrieb Defang Bo:
> Similar to commit <b82175750131>("drm/amdgpu: fix IH overflow on Vega10 v2").
> When an ring buffer overflow happens the appropriate bit is set in the WPTR
> register which is also written back to memory. But clearing the bit in the
> WPTR doesn't trigger another memory writeback.
>
> So what can happen is that we end up processing the buffer overflow over and
> over again because the bit is never cleared. Resulting in a random system
> lockup because of an infinite loop in an interrupt handler.
>
> Signed-off-by: Defang Bo <bodefang at 126.com>

Reviewed-by: Christian König <christian.koenig at amd.com>

Thanks for the help,
Christian.

> ---
> Changes since v1:
> - Modify the subject and replace the wrong register.
> ---
> ---
>   drivers/gpu/drm/amd/amdgpu/cz_ih.c      | 39 +++++++++++++++++++++------------
>   drivers/gpu/drm/amd/amdgpu/iceland_ih.c | 36 +++++++++++++++++++-----------
>   drivers/gpu/drm/amd/amdgpu/tonga_ih.c   | 37 ++++++++++++++++++++-----------
>   3 files changed, 72 insertions(+), 40 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
> index 1dca0cabc326..65361afb21ab 100644
> --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c
> +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
> @@ -190,22 +190,33 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev,
>   			  struct amdgpu_ih_ring *ih)
>   {
>   	u32 wptr, tmp;
> -
> +
>   	wptr = le32_to_cpu(*ih->wptr_cpu);
>   
> -	if (REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) {
> -		wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
> -		/* When a ring buffer overflow happen start parsing interrupt
> -		 * from the last not overwritten vector (wptr + 16). Hopefully
> -		 * this should allow us to catchup.
> -		 */
> -		dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
> -			wptr, ih->rptr, (wptr + 16) & ih->ptr_mask);
> -		ih->rptr = (wptr + 16) & ih->ptr_mask;
> -		tmp = RREG32(mmIH_RB_CNTL);
> -		tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
> -		WREG32(mmIH_RB_CNTL, tmp);
> -	}
> +	if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
> +		goto out;
> +
> +	/* Double check that the overflow wasn't already cleared. */
> +	wptr = RREG32(mmIH_RB_WPTR);
> +
> +	if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
> +		goto out;
> +
> +	wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
> +
> +	/* When a ring buffer overflow happen start parsing interrupt
> +	 * from the last not overwritten vector (wptr + 16). Hopefully
> +	 * this should allow us to catchup.
> +	 */
> +	dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
> +		wptr, ih->rptr, (wptr + 16) & ih->ptr_mask);
> +	ih->rptr = (wptr + 16) & ih->ptr_mask;
> +	tmp = RREG32(mmIH_RB_CNTL);
> +	tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
> +	WREG32(mmIH_RB_CNTL, tmp);
> +
> +
> +out:
>   	return (wptr & ih->ptr_mask);
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
> index a13dd9a51149..8e4dae8addb9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
> +++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
> @@ -193,19 +193,29 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev,
>   
>   	wptr = le32_to_cpu(*ih->wptr_cpu);
>   
> -	if (REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) {
> -		wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
> -		/* When a ring buffer overflow happen start parsing interrupt
> -		 * from the last not overwritten vector (wptr + 16). Hopefully
> -		 * this should allow us to catchup.
> -		 */
> -		dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
> -			 wptr, ih->rptr, (wptr + 16) & ih->ptr_mask);
> -		ih->rptr = (wptr + 16) & ih->ptr_mask;
> -		tmp = RREG32(mmIH_RB_CNTL);
> -		tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
> -		WREG32(mmIH_RB_CNTL, tmp);
> -	}
> +	if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
> +		goto out;
> +
> +	/* Double check that the overflow wasn't already cleared. */
> +	wptr = RREG32(mmIH_RB_WPTR);
> +
> +	if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
> +		goto out;
> +
> +	wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
> +	/* When a ring buffer overflow happen start parsing interrupt
> +	 * from the last not overwritten vector (wptr + 16). Hopefully
> +	 * this should allow us to catchup.
> +	 */
> +	dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
> +		wptr, ih->rptr, (wptr + 16) & ih->ptr_mask);
> +	ih->rptr = (wptr + 16) & ih->ptr_mask;
> +	tmp = RREG32(mmIH_RB_CNTL);
> +	tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
> +	WREG32(mmIH_RB_CNTL, tmp);
> +
> +
> +out:
>   	return (wptr & ih->ptr_mask);
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
> index e40140bf6699..2ba1ce323b6d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
> +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
> @@ -195,19 +195,30 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev,
>   
>   	wptr = le32_to_cpu(*ih->wptr_cpu);
>   
> -	if (REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) {
> -		wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
> -		/* When a ring buffer overflow happen start parsing interrupt
> -		 * from the last not overwritten vector (wptr + 16). Hopefully
> -		 * this should allow us to catchup.
> -		 */
> -		dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
> -			 wptr, ih->rptr, (wptr + 16) & ih->ptr_mask);
> -		ih->rptr = (wptr + 16) & ih->ptr_mask;
> -		tmp = RREG32(mmIH_RB_CNTL);
> -		tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
> -		WREG32(mmIH_RB_CNTL, tmp);
> -	}
> +	if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
> +		goto out;
> +
> +	/* Double check that the overflow wasn't already cleared. */
> +	wptr = RREG32(mmIH_RB_WPTR);
> +
> +	if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
> +		goto out;
> +
> +	wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
> +
> +	/* When a ring buffer overflow happen start parsing interrupt
> +	 * from the last not overwritten vector (wptr + 16). Hopefully
> +	 * this should allow us to catchup.
> +	 */
> +
> +	dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
> +		wptr, ih->rptr, (wptr + 16) & ih->ptr_mask);
> +	ih->rptr = (wptr + 16) & ih->ptr_mask;
> +	tmp = RREG32(mmIH_RB_CNTL);
> +	tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
> +	WREG32(mmIH_RB_CNTL, tmp);
> +
> +out:
>   	return (wptr & ih->ptr_mask);
>   }
>   



More information about the amd-gfx mailing list