[PATCH 1/3] drm/xe: evict user memory in PM notifier

Thomas Hellström thomas.hellstrom at linux.intel.com
Fri Apr 11 15:05:22 UTC 2025


On Thu, 2025-04-10 at 17:20 +0100, Matthew Auld wrote:
> In the case of VRAM we might need to allocate large amounts of
> GFP_KERNEL memory on suspend, however doing that directly in the
> driver
> .suspend()/.prepare() callback is not advisable (no swap for
> example).
> 
> To improve on this we can instead hook up to the PM notifier
> framework
> which is invoked at an earlier stage. We effectively call the evict
> routine twice, where the notifier will have hopefully have cleared
> out
> most if not everything by the time we call it a second time when
> entering the .suspend() callback. For s4 we also get the added
> benefit
> of allocating the system pages before the hibernation image size is
> calculated, which looks more sensible.

> Note that the .suspend() hook is still responsible for dealing with
> all
> the pinned memory. Improving that is left to another patch.
> 
> Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1181
> Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/4288
> Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/4566
> Suggested-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> Signed-off-by: Matthew Auld <matthew.auld at intel.com>

Reviewed-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>


> ---
>  drivers/gpu/drm/xe/xe_bo_evict.c     | 45 ++++++++++++++++-------
>  drivers/gpu/drm/xe/xe_bo_evict.h     |  1 +
>  drivers/gpu/drm/xe/xe_device_types.h |  3 ++
>  drivers/gpu/drm/xe/xe_pci.c          |  2 +-
>  drivers/gpu/drm/xe/xe_pm.c           | 55 ++++++++++++++++++++++++--
> --
>  drivers/gpu/drm/xe/xe_pm.h           |  2 +-
>  6 files changed, 84 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c
> b/drivers/gpu/drm/xe/xe_bo_evict.c
> index 2bf74eb7f281..748360fd2439 100644
> --- a/drivers/gpu/drm/xe/xe_bo_evict.c
> +++ b/drivers/gpu/drm/xe/xe_bo_evict.c
> @@ -47,25 +47,17 @@ static int xe_bo_apply_to_pinned(struct xe_device
> *xe,
>  }
>  
>  /**
> - * xe_bo_evict_all - evict all BOs from VRAM
> - *
> + * xe_bo_evict_all_user - evict all non-pinned user BOs from VRAM
>   * @xe: xe device
>   *
> - * Evict non-pinned user BOs first (via GPU), evict pinned external
> BOs next
> - * (via GPU), wait for evictions, and finally evict pinned kernel
> BOs via CPU.
> - * All eviction magic done via TTM calls.
> + * Evict non-pinned user BOs (via GPU).
>   *
>   * Evict == move VRAM BOs to temporary (typically system) memory.
> - *
> - * This function should be called before the device goes into a
> suspend state
> - * where the VRAM loses power.
>   */
> -int xe_bo_evict_all(struct xe_device *xe)
> +int xe_bo_evict_all_user(struct xe_device *xe)
>  {
>  	struct ttm_device *bdev = &xe->ttm;
> -	struct xe_tile *tile;
>  	u32 mem_type;
> -	u8 id;
>  	int ret;
>  
>  	/* User memory */
> @@ -91,9 +83,34 @@ int xe_bo_evict_all(struct xe_device *xe)
>  		}
>  	}
>  
> -	ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.external,
> -				    &xe->pinned.late.external,
> -				    xe_bo_evict_pinned);
> +	return 0;
> +}
> +
> +/**
> + * xe_bo_evict_all - evict all BOs from VRAM
> + * @xe: xe device
> + *
> + * Evict non-pinned user BOs first (via GPU), evict pinned external
> BOs next
> + * (via GPU), wait for evictions, and finally evict pinned kernel
> BOs via CPU.
> + * All eviction magic done via TTM calls.
> + *
> + * Evict == move VRAM BOs to temporary (typically system) memory.
> + *
> + * This function should be called before the device goes into a
> suspend state
> + * where the VRAM loses power.
> + */
> +int xe_bo_evict_all(struct xe_device *xe)
> +{
> +	struct xe_tile *tile;
> +	u8 id;
> +	int ret;
> +
> +	ret = xe_bo_evict_all_user(xe);
> +	if (ret)
> +		return ret;
> +
> +	ret = xe_bo_apply_to_pinned(xe, &xe-
> >pinned.late.kernel_bo_present,
> +				    &xe->pinned.late.evicted,
> xe_bo_evict_pinned);
>  
>  	if (!ret)
>  		ret = xe_bo_apply_to_pinned(xe, &xe-
> >pinned.late.kernel_bo_present,
> diff --git a/drivers/gpu/drm/xe/xe_bo_evict.h
> b/drivers/gpu/drm/xe/xe_bo_evict.h
> index d63eb3fc5cc9..e7f048634b32 100644
> --- a/drivers/gpu/drm/xe/xe_bo_evict.h
> +++ b/drivers/gpu/drm/xe/xe_bo_evict.h
> @@ -9,6 +9,7 @@
>  struct xe_device;
>  
>  int xe_bo_evict_all(struct xe_device *xe);
> +int xe_bo_evict_all_user(struct xe_device *xe);
>  int xe_bo_restore_early(struct xe_device *xe);
>  int xe_bo_restore_late(struct xe_device *xe);
>  
> diff --git a/drivers/gpu/drm/xe/xe_device_types.h
> b/drivers/gpu/drm/xe/xe_device_types.h
> index 0369fc09c9da..495bc00ebed4 100644
> --- a/drivers/gpu/drm/xe/xe_device_types.h
> +++ b/drivers/gpu/drm/xe/xe_device_types.h
> @@ -522,6 +522,9 @@ struct xe_device {
>  		struct mutex lock;
>  	} d3cold;
>  
> +	/** @pm_notifier: Our PM notifier to perform actions in
> response to various PM events. */
> +	struct notifier_block pm_notifier;
> +
>  	/** @pmt: Support the PMT driver callback interface */
>  	struct {
>  		/** @pmt.lock: protect access for telemetry data */
> diff --git a/drivers/gpu/drm/xe/xe_pci.c
> b/drivers/gpu/drm/xe/xe_pci.c
> index 4fe7e0d941a9..a4caa6222b6f 100644
> --- a/drivers/gpu/drm/xe/xe_pci.c
> +++ b/drivers/gpu/drm/xe/xe_pci.c
> @@ -747,7 +747,7 @@ static void xe_pci_remove(struct pci_dev *pdev)
>  		return;
>  
>  	xe_device_remove(xe);
> -	xe_pm_runtime_fini(xe);
> +	xe_pm_fini(xe);
>  }
>  
>  /*
> diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
> index aaba2a97bb3a..e7ea4003dbf8 100644
> --- a/drivers/gpu/drm/xe/xe_pm.c
> +++ b/drivers/gpu/drm/xe/xe_pm.c
> @@ -282,6 +282,29 @@ static u32 vram_threshold_value(struct xe_device
> *xe)
>  	return DEFAULT_VRAM_THRESHOLD;
>  }
>  
> +static int xe_pm_notifier_callback(struct notifier_block *nb,
> +				   unsigned long action, void *data)
> +{
> +	struct xe_device *xe = container_of(nb, struct xe_device,
> pm_notifier);
> +	int err = 0;
> +
> +	switch (action) {
> +	case PM_HIBERNATION_PREPARE:
> +	case PM_SUSPEND_PREPARE:
> +		xe_pm_runtime_get(xe);
> +		err = xe_bo_evict_all_user(xe);
> +		xe_pm_runtime_put(xe);
> +		if (err)
> +			drm_dbg(&xe->drm, "Notifier evict user
> failed (%d)\n", err);
> +		break;
> +	}
> +
> +	if (err)
> +		return NOTIFY_BAD;
> +
> +	return NOTIFY_DONE;
> +}
> +
>  /**
>   * xe_pm_init - Initialize Xe Power Management
>   * @xe: xe device instance
> @@ -295,6 +318,11 @@ int xe_pm_init(struct xe_device *xe)
>  	u32 vram_threshold;
>  	int err;
>  
> +	xe->pm_notifier.notifier_call = xe_pm_notifier_callback;
> +	err = register_pm_notifier(&xe->pm_notifier);
> +	if (err)
> +		return err;
> +
>  	/* For now suspend/resume is only allowed with GuC */
>  	if (!xe_device_uc_enabled(xe))
>  		return 0;
> @@ -304,24 +332,23 @@ int xe_pm_init(struct xe_device *xe)
>  	if (xe->d3cold.capable) {
>  		err = xe_device_sysfs_init(xe);
>  		if (err)
> -			return err;
> +			goto err_unregister;
>  
>  		vram_threshold = vram_threshold_value(xe);
>  		err = xe_pm_set_vram_threshold(xe, vram_threshold);
>  		if (err)
> -			return err;
> +			goto err_unregister;
>  	}
>  
>  	xe_pm_runtime_init(xe);
> -
>  	return 0;
> +
> +err_unregister:
> +	unregister_pm_notifier(&xe->pm_notifier);
> +	return err;
>  }
>  
> -/**
> - * xe_pm_runtime_fini - Finalize Runtime PM
> - * @xe: xe device instance
> - */
> -void xe_pm_runtime_fini(struct xe_device *xe)
> +static void xe_pm_runtime_fini(struct xe_device *xe)
>  {
>  	struct device *dev = xe->drm.dev;
>  
> @@ -329,6 +356,18 @@ void xe_pm_runtime_fini(struct xe_device *xe)
>  	pm_runtime_forbid(dev);
>  }
>  
> +/**
> + * xe_pm_fini - Finalize PM
> + * @xe: xe device instance
> + */
> +void xe_pm_fini(struct xe_device *xe)
> +{
> +	if (xe_device_uc_enabled(xe))
> +		xe_pm_runtime_fini(xe);
> +
> +	unregister_pm_notifier(&xe->pm_notifier);
> +}
> +
>  static void xe_pm_write_callback_task(struct xe_device *xe,
>  				      struct task_struct *task)
>  {
> diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h
> index 998d1ed64556..59678b310e55 100644
> --- a/drivers/gpu/drm/xe/xe_pm.h
> +++ b/drivers/gpu/drm/xe/xe_pm.h
> @@ -17,7 +17,7 @@ int xe_pm_resume(struct xe_device *xe);
>  
>  int xe_pm_init_early(struct xe_device *xe);
>  int xe_pm_init(struct xe_device *xe);
> -void xe_pm_runtime_fini(struct xe_device *xe);
> +void xe_pm_fini(struct xe_device *xe);
>  bool xe_pm_runtime_suspended(struct xe_device *xe);
>  int xe_pm_runtime_suspend(struct xe_device *xe);
>  int xe_pm_runtime_resume(struct xe_device *xe);



More information about the Intel-xe mailing list