[PATCH] drm/amdgpu: Clear overflow for SRIOV
Alex Deucher
alexdeucher at gmail.com
Fri Apr 11 13:11:18 UTC 2025
On Fri, Apr 11, 2025 at 4:07 AM Emily Deng <Emily.Deng at amd.com> wrote:
>
> For VF, it doesn't have the permission to clear overflow, clear the bit
> by reset.
>
> Signed-off-by: Emily Deng <Emily.Deng at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c | 10 ++++++++--
> drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h | 1 +
> drivers/gpu/drm/amd/amdgpu/ih_v6_0.c | 6 +++++-
> drivers/gpu/drm/amd/amdgpu/vega20_ih.c | 6 +++++-
> 4 files changed, 19 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> index 901f8b12c672..1c8a40d579c7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> @@ -227,13 +227,19 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
> ih->rptr &= ih->ptr_mask;
> }
>
> - amdgpu_ih_set_rptr(adev, ih);
> + if (!ih->overflow)
> + amdgpu_ih_set_rptr(adev, ih);
> +
> wake_up_all(&ih->wait_process);
>
> /* make sure wptr hasn't changed while processing */
> wptr = amdgpu_ih_get_wptr(adev, ih);
> if (wptr != ih->rptr)
> - goto restart_ih;
> + if (!ih->overflow)
> + goto restart_ih;
> +
> + if (ih->overflow)
> + amdgpu_amdkfd_gpu_reset(adev);
This won't work if KFD is not compiled in your kernel config.
Alex
>
> return IRQ_HANDLED;
> }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> index 7d4395a5d8ac..ff76f02d3e96 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> @@ -72,6 +72,7 @@ struct amdgpu_ih_ring {
> /* For waiting on IH processing at checkpoint. */
> wait_queue_head_t wait_process;
> uint64_t processed_timestamp;
> + bool overflow;
> };
>
> /* return true if time stamp t2 is after t1 with 48bit wrap around */
> diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
> index f8a485164437..8d3ae88b96a4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
> @@ -349,6 +349,7 @@ static int ih_v6_0_irq_init(struct amdgpu_device *adev)
> if (ret)
> return ret;
> }
> + ih[i]->overflow = false;
> }
>
> /* update doorbell range for ih ring 0 */
> @@ -446,7 +447,10 @@ static u32 ih_v6_0_get_wptr(struct amdgpu_device *adev,
> wptr = RREG32_NO_KIQ(ih_regs->ih_rb_wptr);
> if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
> goto out;
> - wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
> + if (!amdgpu_sriov_vf(adev))
> + wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
> + else
> + ih->overflow = true;
>
> /* When a ring buffer overflow happen start parsing interrupt
> * from the last not overwritten vector (wptr + 32). Hopefully
> diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
> index e9e3b2ed4b7b..2ad209406d17 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
> @@ -350,6 +350,7 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev)
> if (ret)
> return ret;
> }
> + ih[i]->overflow = false;
> }
>
> if (!amdgpu_sriov_vf(adev))
> @@ -437,7 +438,10 @@ static u32 vega20_ih_get_wptr(struct amdgpu_device *adev,
> if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
> goto out;
>
> - wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
> + if (!amdgpu_sriov_vf(adev))
> + wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
> + else
> + ih->overflow = true;
>
> /* When a ring buffer overflow happen start parsing interrupt
> * from the last not overwritten vector (wptr + 32). Hopefully
> --
> 2.34.1
>
More information about the amd-gfx
mailing list