[PATCH 6/6] drm/amdgpu: fix fence fallback timer expired error

Christian König ckoenig.leichtzumerken at gmail.com
Wed Apr 16 13:54:39 UTC 2025


Am 14.04.25 um 12:46 schrieb Samuel Zhang:
> IH is not working after switching a new gpu index for the first time.
> IH handler function need to be re-registered with kernel after switching
> to new gpu index.

Why?

Christian.

>
> Signed-off-by: Samuel Zhang <guoqing.zhang at amd.com>
> Change-Id: Idece1c8fce24032fd08f5a8b6ac23793c51e56dd
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c |  7 +++++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h |  1 +
>  drivers/gpu/drm/amd/amdgpu/vega20_ih.c  | 18 ++++++++++++++++--
>  3 files changed, 22 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
> index 19ce4da285e8..2292245a0c5d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
> @@ -326,7 +326,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
>  	return r;
>  }
>  
> -void amdgpu_irq_fini_hw(struct amdgpu_device *adev)
> +void amdgpu_irq_uninstall(struct amdgpu_device *adev)
>  {
>  	if (adev->irq.installed) {
>  		free_irq(adev->irq.irq, adev_to_drm(adev));
> @@ -334,7 +334,10 @@ void amdgpu_irq_fini_hw(struct amdgpu_device *adev)
>  		if (adev->irq.msi_enabled)
>  			pci_free_irq_vectors(adev->pdev);
>  	}
> -
> +}
> +void amdgpu_irq_fini_hw(struct amdgpu_device *adev)
> +{
> +	amdgpu_irq_uninstall(adev);
>  	amdgpu_ih_ring_fini(adev, &adev->irq.ih_soft);
>  	amdgpu_ih_ring_fini(adev, &adev->irq.ih);
>  	amdgpu_ih_ring_fini(adev, &adev->irq.ih1);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
> index 04c0b4fa17a4..c6e6681b4f71 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
> @@ -123,6 +123,7 @@ extern const int node_id_to_phys_map[NODEID_MAX];
>  void amdgpu_irq_disable_all(struct amdgpu_device *adev);
>  
>  int amdgpu_irq_init(struct amdgpu_device *adev);
> +void amdgpu_irq_uninstall(struct amdgpu_device *adev);
>  void amdgpu_irq_fini_sw(struct amdgpu_device *adev);
>  void amdgpu_irq_fini_hw(struct amdgpu_device *adev);
>  int amdgpu_irq_add_id(struct amdgpu_device *adev,
> diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
> index faa0dd75dd6d..ef996505e4dc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
> @@ -643,12 +643,26 @@ static int vega20_ih_hw_fini(struct amdgpu_ip_block *ip_block)
>  
>  static int vega20_ih_suspend(struct amdgpu_ip_block *ip_block)
>  {
> -	return vega20_ih_hw_fini(ip_block);
> +	struct amdgpu_device *adev = ip_block->adev;
> +	int r = 0;
> +
> +	r = vega20_ih_hw_fini(ip_block);
> +	amdgpu_irq_uninstall(adev);
> +	return r;
>  }
>  
>  static int vega20_ih_resume(struct amdgpu_ip_block *ip_block)
>  {
> -	return vega20_ih_hw_init(ip_block);
> +	struct amdgpu_device *adev = ip_block->adev;
> +	int r = 0;
> +
> +	r = amdgpu_irq_init(adev);
> +	if (r) {
> +		dev_err(adev->dev, "amdgpu_irq_init failed in %s, %d\n", __func__, r);
> +		return r;
> +	}
> +	r = vega20_ih_hw_init(ip_block);
> +	return r;
>  }
>  
>  static bool vega20_ih_is_idle(struct amdgpu_ip_block *ip_block)



More information about the amd-gfx mailing list