[PATCH] drm/amdgpu: Update irq disable flow during unload
Lazar, Lijo
lijo.lazar at amd.com
Mon Jan 8 03:50:08 UTC 2024
On 1/5/2024 8:51 PM, Asad Kamal wrote:
> In certain special cases, e.g device reset before module
> unload, irq gets disabled as part of reset sequence and
> won't get enabled back. Add special check to cover such scenarios
>
> Signed-off-by: Asad Kamal <asad.kamal at amd.com>
> Suggested-by: Lijo Lazar <lijo.lazar at amd.com>
Please also add the tag
Fixes: f5c7e7797060 ("drm/amdgpu: Adjust removal control flow for smu
v13_0_2")
Reviewed-by: Lijo Lazar <lijo.lazar at amd.com>
Thanks,
Lijo
> ---
> drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 12 ++++++++++--
> drivers/gpu/drm/amd/amdgpu/soc15.c | 13 +++++++++++--
> 2 files changed, 21 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 372de9f1ce59..a4e1b9a58679 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -2361,6 +2361,7 @@ static void gmc_v9_0_gart_disable(struct amdgpu_device *adev)
> static int gmc_v9_0_hw_fini(void *handle)
> {
> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> + bool irq_release = true;
>
> gmc_v9_0_gart_disable(adev);
>
> @@ -2378,9 +2379,16 @@ static int gmc_v9_0_hw_fini(void *handle)
> if (adev->mmhub.funcs->update_power_gating)
> adev->mmhub.funcs->update_power_gating(adev, false);
>
> - amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
> + if (adev->shutdown)
> + irq_release = amdgpu_irq_enabled(adev, &adev->gmc.vm_fault, 0);
>
> - if (adev->gmc.ecc_irq.funcs &&
> + if (irq_release)
> + amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
> +
> + if (adev->shutdown)
> + irq_release = amdgpu_irq_enabled(adev, &adev->gmc.ecc_irq, 0);
> +
> + if (adev->gmc.ecc_irq.funcs && irq_release &&
> amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC))
> amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
> index 15033efec2ba..7ee835049d57 100644
> --- a/drivers/gpu/drm/amd/amdgpu/soc15.c
> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
> @@ -1266,6 +1266,7 @@ static int soc15_common_hw_init(void *handle)
> static int soc15_common_hw_fini(void *handle)
> {
> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> + bool irq_release = true;
>
> /* Disable the doorbell aperture and selfring doorbell aperture
> * separately in hw_fini because soc15_enable_doorbell_aperture
> @@ -1280,10 +1281,18 @@ static int soc15_common_hw_fini(void *handle)
>
> if (adev->nbio.ras_if &&
> amdgpu_ras_is_supported(adev, adev->nbio.ras_if->block)) {
> - if (adev->nbio.ras &&
> + if (adev->shutdown)
> + irq_release = amdgpu_irq_enabled(adev, &adev->nbio.ras_controller_irq, 0);
> +
> + if (adev->nbio.ras && irq_release &&
> adev->nbio.ras->init_ras_controller_interrupt)
> amdgpu_irq_put(adev, &adev->nbio.ras_controller_irq, 0);
> - if (adev->nbio.ras &&
> +
> + if (adev->shutdown)
> + irq_release = amdgpu_irq_enabled(adev,
> + &adev->nbio.ras_err_event_athub_irq, 0);
> +
> + if (adev->nbio.ras && irq_release &&
> adev->nbio.ras->init_ras_err_event_athub_interrupt)
> amdgpu_irq_put(adev, &adev->nbio.ras_err_event_athub_irq, 0);
> }
More information about the amd-gfx
mailing list