[PATCH v3 06/13] drm/xe: Add callback support for driver remove

Lucas De Marchi lucas.demarchi at intel.com
Wed Feb 12 05:45:38 UTC 2025


On Mon, Feb 10, 2025 at 12:41:46PM -0500, Rodrigo Vivi wrote:
>On Fri, Feb 07, 2025 at 02:19:38PM -0800, Lucas De Marchi wrote:
>> xe device probe uses devm cleanup in most places. However there are a
>> few that are not possible: when the driver interacts with other
>
>"few cases where this is not possible" ?!
>
>> subsystems that require the cleanup to happen before the device being
>> removed from the bus. One example is the component_* APIs used by
>> xe_gsc_proxy and display.
>>
>> Add a callback-based remove so the exception don't make the probe
>> use multiple error handling styles.
>>
>> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
>> Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
>> Cc: Thomas Hellström <thomas.hellstrom at linux.intel.com>
>> Signed-off-by: Lucas De Marchi <lucas.demarchi at intel.com>
>> ---
>>  drivers/gpu/drm/xe/xe_device.c               | 59 ++++++++++++++++++++
>>  drivers/gpu/drm/xe/xe_device.h               |  4 ++
>>  drivers/gpu/drm/xe/xe_device_remove_action.h | 24 ++++++++
>>  drivers/gpu/drm/xe/xe_device_types.h         | 15 +++++
>>  drivers/gpu/drm/xe/xe_pci.c                  |  4 +-
>>  5 files changed, 105 insertions(+), 1 deletion(-)
>>  create mode 100644 drivers/gpu/drm/xe/xe_device_remove_action.h
>>
>> diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
>> index 90275531653fe..5fc4e696262f9 100644
>> --- a/drivers/gpu/drm/xe/xe_device.c
>> +++ b/drivers/gpu/drm/xe/xe_device.c
>> @@ -747,6 +747,9 @@ int xe_device_probe(struct xe_device *xe)
>>  	u8 last_gt;
>>  	u8 id;
>>
>> +	xe->probing = true;
>> +	INIT_LIST_HEAD(&xe->remove_action_list);
>> +
>>  	xe_pat_init_early(xe);
>>
>>  	err = xe_sriov_init(xe);
>> @@ -892,6 +895,8 @@ int xe_device_probe(struct xe_device *xe)
>>
>>  	xe_vsec_init(xe);
>>
>> +	xe->probing = false;
>> +
>>  	return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe);
>>
>>  err_fini_display:
>> @@ -911,6 +916,58 @@ int xe_device_probe(struct xe_device *xe)
>>  	return err;
>>  }
>>
>> +/**
>> + * xe_device_call_remove_actions - Call the remove actions
>> + * @xe: xe device instance
>> + *
>> + * This is only to be used by xe_pci and xe_device to call the remove actions
>> + * while removing the driver or handling probe failures.
>> + */
>> +void xe_device_call_remove_actions(struct xe_device *xe)
>> +{
>> +	struct xe_device_remove_action *ra;
>> +
>> +	list_for_each_entry(ra, &xe->remove_action_list, node)
>> +		ra->remove(ra);
>> +
>> +	xe->probing = false;
>> +}
>> +
>> +/**
>> + * xe_device_add_remove_action - Add an action to run on driver removal
>> + * @xe: xe device instance
>> + * @ra: pointer to the object embedded into the object to cleanup
>> + * @remove: function to execute. The @ra is passed as argument
>> + *
>> + * Example:
>> + *
>> + * .. code-block:: c
>> + *
>> + *	static void foo_remove(struct xe_device_remove_action *ra)
>> + *	{
>> + *		struct xe_foo *foo = container_of(ra, struct xe_foo, remove_action);
>> + *		...
>> + *	}
>> + *
>> + *	int xe_foo_init(struct xe_foo *foo)
>> + *	{
>> + *		...
>> + *		xe_device_add_remove_action(xe, &foo->remove_action, foo_remove);
>> + *		...
>> + *		return 0;
>> + *	};
>
>Although the cover letter mention that this should be the exception, the
>documentation here doesn't make that so clear.
>
>I believe we should be more clear on what cases this structure is aiming
>and some basic rules on when to go here instead of devm or drmm.

ok.. I went back to double check the cases in which this is really
really needed and the only one is apparently the handling with
component driver: devres is currently not handling well the devres group
removal while all resources are removed.  I have a patch for that, but
I think I will have to do it in parallel to what's being done here.

CI uncovered one more issue since xe_display_fini() may also do
component_del() in ADL-P... I will move this patch to be the first one,
fixup the xe_display part and re-submit.  In parallel I'm submitting the
improvements in drivers/base/{component,devres}.c to the relevant mailing
list.

>
>And probably even keep that comment where it is used with the GSC code.
>
>But other than that, the code and the approach looks good to me.

thanks
Lucas De Marchi

>
>> + */
>> +void xe_device_add_remove_action(struct xe_device *xe,
>> +				 struct xe_device_remove_action *ra,
>> +				 void (*remove)(struct xe_device_remove_action *ra))
>> +{
>> +	drm_WARN_ON(&xe->drm, !xe->probing);
>> +
>> +	INIT_LIST_HEAD(&ra->node);
>> +	ra->remove = remove;
>> +	list_add(&ra->node, &xe->remove_action_list);
>> +}
>> +
>>  static void xe_device_remove_display(struct xe_device *xe)
>>  {
>>  	xe_display_unregister(xe);
>> @@ -934,6 +991,8 @@ void xe_device_remove(struct xe_device *xe)
>>
>>  	for_each_gt(gt, xe, id)
>>  		xe_gt_remove(gt);
>> +
>> +	xe_device_call_remove_actions(xe);
>>  }
>>
>>  void xe_device_shutdown(struct xe_device *xe)
>> diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
>> index fc3c2af3fb7fd..3fecf865957b0 100644
>> --- a/drivers/gpu/drm/xe/xe_device.h
>> +++ b/drivers/gpu/drm/xe/xe_device.h
>> @@ -45,6 +45,10 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
>>  				   const struct pci_device_id *ent);
>>  int xe_device_probe_early(struct xe_device *xe);
>>  int xe_device_probe(struct xe_device *xe);
>> +void xe_device_add_remove_action(struct xe_device *xe,
>> +				 struct xe_device_remove_action *ra,
>> +				 void (*remove)(struct xe_device_remove_action *ra));
>> +void xe_device_call_remove_actions(struct xe_device *xe);
>>  void xe_device_remove(struct xe_device *xe);
>>  void xe_device_shutdown(struct xe_device *xe);
>>
>> diff --git a/drivers/gpu/drm/xe/xe_device_remove_action.h b/drivers/gpu/drm/xe/xe_device_remove_action.h
>> new file mode 100644
>> index 0000000000000..e0322c4660dda
>> --- /dev/null
>> +++ b/drivers/gpu/drm/xe/xe_device_remove_action.h
>> @@ -0,0 +1,24 @@
>> +/* SPDX-License-Identifier: MIT */
>> +/*
>> + * Copyright © 2025 Intel Corporation
>> + */
>> +
>> +#ifndef _XE_DEVICE_REMOVE_ACTION_H_
>> +#define _XE_DEVICE_REMOVE_ACTION_H_
>> +
>> +#include <linux/list.h>
>> +
>> +/**
>> + * struct xe_device_remove_action - Action item to run on driver removal
>> + *
>> + * This should be used like a list_head, embeding it into structures of the
>> + * individual parts being initialized. Once the remove action is ready to be
>> + * added, call xe_device_add_remove_action() to initialize and use this struct.
>> + */
>> +struct xe_device_remove_action {
>> +	/* private: */
>> +	struct list_head node;
>> +	void (*remove)(struct xe_device_remove_action *ra);
>> +};
>> +
>> +#endif
>> diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
>> index c0e886bac1831..4c902e0cb4ba9 100644
>> --- a/drivers/gpu/drm/xe/xe_device_types.h
>> +++ b/drivers/gpu/drm/xe/xe_device_types.h
>> @@ -13,6 +13,7 @@
>>  #include <drm/ttm/ttm_device.h>
>>
>>  #include "xe_devcoredump_types.h"
>> +#include "xe_device_remove_action.h"
>>  #include "xe_heci_gsc.h"
>>  #include "xe_lmtt_types.h"
>>  #include "xe_memirq_types.h"
>> @@ -428,6 +429,20 @@ struct xe_device {
>>  	/** @tiles: device tiles */
>>  	struct xe_tile tiles[XE_MAX_TILES_PER_DEVICE];
>>
>> +	/**
>> +	 * @remove_action_list: list of actions to execute on device remove.
>> +	 * Use xe_device_add_remove_action() for that. Actions can only be added
>> +	 * during probe and are executed during the call from PCI subsystem to
>> +	 * remove the driver from the device.
>> +	 */
>> +	struct list_head remove_action_list;
>> +
>> +	/**
>> +	 * @probing: cover the section in which @remove_action_list can be used
>> +	 * to post cleaning actions
>> +	 */
>> +	bool probing;
>> +
>>  	/**
>>  	 * @mem_access: keep track of memory access in the device, possibly
>>  	 * triggering additional actions when they occur.
>> diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
>> index 6a8e82aff3853..70b697fde5b96 100644
>> --- a/drivers/gpu/drm/xe/xe_pci.c
>> +++ b/drivers/gpu/drm/xe/xe_pci.c
>> @@ -905,8 +905,10 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
>>  		return err;
>>
>>  	err = xe_device_probe(xe);
>> -	if (err)
>> +	if (err) {
>> +		xe_device_call_remove_actions(xe);
>>  		return err;
>> +	}
>>
>>  	err = xe_pm_init(xe);
>>  	if (err)
>> --
>> 2.48.1
>>


More information about the Intel-xe mailing list