[RFC v2 03/15] drm/amdgpu: free all resources on error recovery path of amdgpu_ras_init()
Lazar, Lijo
lijo.lazar at amd.com
Fri Jan 17 05:39:12 UTC 2025
On 1/13/2025 7:12 AM, Jiang Liu wrote:
> Free all allocated resources on error recovery path in function
> amdgpu_ras_init().
>
> Signed-off-by: Jiang Liu <gerry at linux.alibaba.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 19 ++++++++++++++-----
> 1 file changed, 14 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index c10ea3fd3e16..6b508a9b1abe 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -3864,6 +3864,7 @@ static void amdgpu_ras_init_reserved_vram_size(struct amdgpu_device *adev)
> int amdgpu_ras_init(struct amdgpu_device *adev)
> {
> struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
> + struct amdgpu_ras_block_list *ras_node, *tmp;
> int r;
>
> if (con)
> @@ -3953,20 +3954,20 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
> * to handle fatal error */
> r = amdgpu_nbio_ras_sw_init(adev);
> if (r)
> - return r;
> + goto release_con;
>
> if (adev->nbio.ras &&
> adev->nbio.ras->init_ras_controller_interrupt) {
> r = adev->nbio.ras->init_ras_controller_interrupt(adev);
> if (r)
> - goto release_con;
> + goto free_blocks;
> }
>
> if (adev->nbio.ras &&
> adev->nbio.ras->init_ras_err_event_athub_interrupt) {
> r = adev->nbio.ras->init_ras_err_event_athub_interrupt(adev);
> if (r)
> - goto release_con;
> + goto free_blocks;
> }
>
> /* Packed socket_id to ras feature mask bits[31:29] */
> @@ -3982,7 +3983,7 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
>
> if (amdgpu_ras_fs_init(adev)) {
> r = -EINVAL;
> - goto release_con;
> + goto free_blocks;
> }
>
> if (amdgpu_ras_aca_is_supported(adev)) {
> @@ -3991,7 +3992,7 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
> else
> r = amdgpu_mca_init(adev);
> if (r)
> - goto release_con;
> + goto clear_ras_fs;
> }
>
> dev_info(adev->dev, "RAS INFO: ras initialized successfully, "
> @@ -3999,6 +4000,14 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
> adev->ras_hw_enabled, adev->ras_enabled);
>
> return 0;
> +
> +clear_ras_fs:
> + amdgpu_ras_fs_fini(adev);
> +free_blocks:
> + list_for_each_entry_safe(ras_node, tmp, &adev->ras_list, node) {
> + list_del(&ras_node->node);
> + kfree(ras_node);
Suggest to add amdgpu_nbio_ras_sw_fini which calls something like
amdgpu_ras_unregister_ras_block instead of this.
Thanks,
Lijo
> + }
> release_con:
> amdgpu_ras_set_context(adev, NULL);
> kfree(con);
More information about the amd-gfx
mailing list