[PATCH] Revert "drm/xe: make gt_remove use devm"

Daniele Ceraolo Spurio daniele.ceraolospurio at intel.com
Tue May 28 18:27:32 UTC 2024



On 5/28/2024 11:23 AM, Daniele Ceraolo Spurio wrote:
> The gt_remove function was explictly added as part of the remove flow
> instead of using drmm/devm automatic cleanup due to it being illegal
> to remove a component after the driver has been detached from the pci
> device; the GSC proxy component is removed as part of gt_remove, so we
> need to do it in the pci cleanup flow. The function already has a
> comment above it to explain this.
>
> Note that the change to use the devm also caused an invalid pointer
> deref in the gsc_proxy unbind function, but I didn't bother to debug
> which pointer was bad since we shouldn't be calling the unbind that
> late anyway and this revert fixes it.

Here is the bad pointer deref log in case anyone wants to have a better 
look:

https://intel-gfx-ci.01.org/tree/intel-xe/xe-pw-134099v1/shard-lnl-8/igt@xe_module_load@reload.html

Daniele

>
> Both issue were not seen in CI because the GSC loading is temporarily
> disabled due to a critical bug, which means we're not binding the
> component.
>
> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> Cc: Matthew Auld <matthew.auld at intel.com>
> Cc: Andrzej Hajda <andrzej.hajda at intel.com>
> Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
> ---
>   drivers/gpu/drm/xe/xe_device.c | 22 ++++++++++++++++++++--
>   drivers/gpu/drm/xe/xe_gt.c     | 16 +++++++++-------
>   drivers/gpu/drm/xe/xe_gt.h     |  1 +
>   3 files changed, 30 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
> index 85c1b0c406a6..4c44f23e58ea 100644
> --- a/drivers/gpu/drm/xe/xe_device.c
> +++ b/drivers/gpu/drm/xe/xe_device.c
> @@ -549,6 +549,7 @@ int xe_device_probe(struct xe_device *xe)
>   	struct xe_tile *tile;
>   	struct xe_gt *gt;
>   	int err;
> +	u8 last_gt;
>   	u8 id;
>   
>   	xe_pat_init_early(xe);
> @@ -647,16 +648,18 @@ int xe_device_probe(struct xe_device *xe)
>   		goto err_irq_shutdown;
>   
>   	for_each_gt(gt, xe, id) {
> +		last_gt = id;
> +
>   		err = xe_gt_init(gt);
>   		if (err)
> -			goto err_irq_shutdown;
> +			goto err_fini_gt;
>   	}
>   
>   	xe_heci_gsc_init(xe);
>   
>   	err = xe_display_init(xe);
>   	if (err)
> -		goto err_irq_shutdown;
> +		goto err_fini_gt;
>   
>   	err = drm_dev_register(&xe->drm, 0);
>   	if (err)
> @@ -672,6 +675,15 @@ int xe_device_probe(struct xe_device *xe)
>   
>   err_fini_display:
>   	xe_display_driver_remove(xe);
> +
> +err_fini_gt:
> +	for_each_gt(gt, xe, id) {
> +		if (id < last_gt)
> +			xe_gt_remove(gt);
> +		else
> +			break;
> +	}
> +
>   err_irq_shutdown:
>   	xe_irq_shutdown(xe);
>   err:
> @@ -689,12 +701,18 @@ static void xe_device_remove_display(struct xe_device *xe)
>   
>   void xe_device_remove(struct xe_device *xe)
>   {
> +	struct xe_gt *gt;
> +	u8 id;
> +
>   	xe_device_remove_display(xe);
>   
>   	xe_display_fini(xe);
>   
>   	xe_heci_gsc_fini(xe);
>   
> +	for_each_gt(gt, xe, id)
> +		xe_gt_remove(gt);
> +
>   	xe_irq_shutdown(xe);
>   }
>   
> diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
> index 6f4b59a6e710..98c2228b51d0 100644
> --- a/drivers/gpu/drm/xe/xe_gt.c
> +++ b/drivers/gpu/drm/xe/xe_gt.c
> @@ -93,14 +93,16 @@ void xe_gt_sanitize(struct xe_gt *gt)
>   	gt->uc.guc.submission_state.enabled = false;
>   }
>   
> -/*
> - * Clean up the GT structures before driver removal. This function should only
> - * act on objects/structures that must be cleaned before the driver removal
> - * callback is complete and therefore can't be deferred to a drmm action.
> +/**
> + * xe_gt_remove() - Clean up the GT structures before driver removal
> + * @gt: the GT object
> + *
> + * This function should only act on objects/structures that must be cleaned
> + * before the driver removal callback is complete and therefore can't be
> + * deferred to a drmm action.
>    */
> -static void gt_remove(void *arg)
> +void xe_gt_remove(struct xe_gt *gt)
>   {
> -	struct xe_gt *gt = arg;
>   	int i;
>   
>   	xe_uc_remove(&gt->uc);
> @@ -566,7 +568,7 @@ int xe_gt_init(struct xe_gt *gt)
>   
>   	xe_gt_record_user_engines(gt);
>   
> -	return devm_add_action_or_reset(gt_to_xe(gt)->drm.dev, gt_remove, gt);
> +	return 0;
>   }
>   
>   void xe_gt_record_user_engines(struct xe_gt *gt)
> diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
> index d0747edfe020..9073ac68a777 100644
> --- a/drivers/gpu/drm/xe/xe_gt.h
> +++ b/drivers/gpu/drm/xe/xe_gt.h
> @@ -56,6 +56,7 @@ int xe_gt_suspend(struct xe_gt *gt);
>   int xe_gt_resume(struct xe_gt *gt);
>   void xe_gt_reset_async(struct xe_gt *gt);
>   void xe_gt_sanitize(struct xe_gt *gt);
> +void xe_gt_remove(struct xe_gt *gt);
>   
>   /**
>    * xe_gt_any_hw_engine_by_reset_domain - scan the list of engines and return the



More information about the Intel-xe mailing list