[PATCH v2 2/7] drm/xe: split pinned save/restore into phases

K V P, Satyanarayana satyanarayana.k.v.p at intel.com
Thu Jan 23 07:54:47 UTC 2025


> -----Original Message-----
> From: Intel-xe <intel-xe-bounces at lists.freedesktop.org> On Behalf Of
> Matthew Auld
> Sent: Wednesday, December 18, 2024 5:49 PM
> To: intel-xe at lists.freedesktop.org
> Cc: Thomas Hellström <thomas.hellstrom at linux.intel.com>; Brost, Matthew
> <matthew.brost at intel.com>
> Subject: [PATCH v2 2/7] drm/xe: split pinned save/restore into phases
> 
> Early phase we can only use memcpy to bootstrap enough of the system to
> get working submission. For late phase we can switch over to using GPU
> blitter to do the copy. As part of this move the user pinned stuff into
> the late phase. In the future we can move more kernel pinned stuff into
> the late phase.
> 
> v2:
>  - Invert ordering in evict_pinned when skipping; should be opposite of
>    restore.
>  - Fix kernel-doc.
> 
> Signed-off-by: Matthew Auld <matthew.auld at intel.com>
> Cc: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> Cc: Matthew Brost <matthew.brost at intel.com>
> ---
Reviewed-by: Satyanarayana K V P <satyanarayana.k.v.p at intel.com>

>  drivers/gpu/drm/xe/tests/xe_bo.c     |   8 +-
>  drivers/gpu/drm/xe/xe_bo.c           |   7 +-
>  drivers/gpu/drm/xe/xe_bo.h           |   1 +
>  drivers/gpu/drm/xe/xe_bo_evict.c     | 220 ++++++++++++---------------
>  drivers/gpu/drm/xe/xe_bo_evict.h     |   4 +-
>  drivers/gpu/drm/xe/xe_device.c       |   3 +-
>  drivers/gpu/drm/xe/xe_device_types.h |   6 +-
>  drivers/gpu/drm/xe/xe_pm.c           |  16 +-
>  8 files changed, 122 insertions(+), 143 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c
> b/drivers/gpu/drm/xe/tests/xe_bo.c
> index c9ec7a313c6b..7619eba2ef1f 100644
> --- a/drivers/gpu/drm/xe/tests/xe_bo.c
> +++ b/drivers/gpu/drm/xe/tests/xe_bo.c
> @@ -252,7 +252,7 @@ static int evict_test_run_tile(struct xe_device *xe,
> struct xe_tile *tile, struc
> 
>  		for_each_gt(__gt, xe, id)
>  			xe_gt_sanitize(__gt);
> -		err = xe_bo_restore_kernel(xe);
> +		err = xe_bo_restore_pinned_early(xe);
>  		/*
>  		 * Snapshotting the CTB and copying back a potentially old
>  		 * version seems risky, depending on what might have been
> @@ -269,14 +269,14 @@ static int evict_test_run_tile(struct xe_device *xe,
> struct xe_tile *tile, struc
>  			flush_work(&__gt->reset.worker);
>  		}
>  		if (err) {
> -			KUNIT_FAIL(test, "restore kernel err=%pe\n",
> +			KUNIT_FAIL(test, "restore early err=%pe\n",
>  				   ERR_PTR(err));
>  			goto cleanup_all;
>  		}
> 
> -		err = xe_bo_restore_user(xe);
> +		err = xe_bo_restore_pinned_late(xe);
>  		if (err) {
> -			KUNIT_FAIL(test, "restore user err=%pe\n",
> ERR_PTR(err));
> +			KUNIT_FAIL(test, "restore late err=%pe\n",
> ERR_PTR(err));
>  			goto cleanup_all;
>  		}
> 
> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
> index fa2d74a56283..c0515c1dd00c 100644
> --- a/drivers/gpu/drm/xe/xe_bo.c
> +++ b/drivers/gpu/drm/xe/xe_bo.c
> @@ -124,7 +124,7 @@ bool xe_bo_is_stolen_devmem(struct xe_bo *bo)
>  		GRAPHICS_VERx100(xe_bo_device(bo)) >= 1270;
>  }
> 
> -static bool xe_bo_is_user(struct xe_bo *bo)
> +bool xe_bo_is_user(struct xe_bo *bo)
>  {
>  	return bo->flags & XE_BO_FLAG_USER;
>  }
> @@ -1885,8 +1885,7 @@ int xe_bo_pin_external(struct xe_bo *bo)
> 
>  		if (xe_bo_is_vram(bo)) {
>  			spin_lock(&xe->pinned.lock);
> -			list_add_tail(&bo->pinned_link,
> -				      &xe->pinned.external_vram);
> +			list_add_tail(&bo->pinned_link, &xe-
> >pinned.present);
>  			spin_unlock(&xe->pinned.lock);
>  		}
>  	}
> @@ -1930,7 +1929,7 @@ int xe_bo_pin(struct xe_bo *bo)
> 
>  	if (mem_type_is_vram(place->mem_type) || bo->flags &
> XE_BO_FLAG_GGTT) {
>  		spin_lock(&xe->pinned.lock);
> -		list_add_tail(&bo->pinned_link, &xe-
> >pinned.kernel_bo_present);
> +		list_add_tail(&bo->pinned_link, &xe->pinned.present);
>  		spin_unlock(&xe->pinned.lock);
>  	}
> 
> diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
> index d9386ab03140..a0ac2b9c903d 100644
> --- a/drivers/gpu/drm/xe/xe_bo.h
> +++ b/drivers/gpu/drm/xe/xe_bo.h
> @@ -235,6 +235,7 @@ bool xe_bo_is_vram(struct xe_bo *bo);
>  bool xe_bo_is_stolen(struct xe_bo *bo);
>  bool xe_bo_is_stolen_devmem(struct xe_bo *bo);
>  bool xe_bo_has_single_placement(struct xe_bo *bo);
> +bool xe_bo_is_user(struct xe_bo *bo);
>  uint64_t vram_region_gpu_offset(struct ttm_resource *res);
> 
>  bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type);
> diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c
> b/drivers/gpu/drm/xe/xe_bo_evict.c
> index 96764d8169f4..119b8301bd7f 100644
> --- a/drivers/gpu/drm/xe/xe_bo_evict.c
> +++ b/drivers/gpu/drm/xe/xe_bo_evict.c
> @@ -10,6 +10,49 @@
>  #include "xe_ggtt.h"
>  #include "xe_tile.h"
> 
> +static int xe_evict_pinned(struct xe_device *xe, bool memcpy_only)
> +{
> +	struct xe_tile *tile;
> +	LIST_HEAD(skipped);
> +	struct xe_bo *bo;
> +	u8 id;
> +	int ret = 0;
> +
> +	spin_lock(&xe->pinned.lock);
> +	for (;;) {
> +		bo = list_first_entry_or_null(&xe->pinned.present,
> typeof(*bo), pinned_link);
> +		if (!bo)
> +			break;
> +
> +		if (!memcpy_only && !xe_bo_is_user(bo)) {
> +			list_move_tail(&bo->pinned_link, &skipped);
> +			continue;
> +		}
> +
> +		xe_bo_get(bo);
> +		list_move_tail(&bo->pinned_link, &xe->pinned.evicted);
> +		spin_unlock(&xe->pinned.lock);
> +
> +		ret = xe_bo_evict_pinned(bo);
> +		xe_bo_put(bo);
> +		spin_lock(&xe->pinned.lock);
> +		if (ret) {
> +			list_move_tail(&bo->pinned_link, &xe-
> >pinned.present);
> +			break;
> +		}
> +	}
> +
> +	list_splice_tail(&skipped, &xe->pinned.evicted);
> +	spin_unlock(&xe->pinned.lock);
> +
> +	if (!memcpy_only) {
> +		for_each_tile(tile, xe, id)
> +			xe_tile_migrate_wait(tile);
> +	}
> +
> +	return ret;
> +}
> +
>  /**
>   * xe_bo_evict_all - evict all BOs from VRAM
>   *
> @@ -27,11 +70,7 @@
>  int xe_bo_evict_all(struct xe_device *xe)
>  {
>  	struct ttm_device *bdev = &xe->ttm;
> -	struct xe_bo *bo;
> -	struct xe_tile *tile;
> -	struct list_head still_in_list;
>  	u32 mem_type;
> -	u8 id;
>  	int ret;
> 
>  	/* User memory */
> @@ -57,92 +96,57 @@ int xe_bo_evict_all(struct xe_device *xe)
>  		}
>  	}
> 
> -	/* Pinned user memory in VRAM */
> -	INIT_LIST_HEAD(&still_in_list);
> -	spin_lock(&xe->pinned.lock);
> -	for (;;) {
> -		bo = list_first_entry_or_null(&xe->pinned.external_vram,
> -					      typeof(*bo), pinned_link);
> -		if (!bo)
> -			break;
> -		xe_bo_get(bo);
> -		list_move_tail(&bo->pinned_link, &still_in_list);
> -		spin_unlock(&xe->pinned.lock);
> -
> -		ret = xe_bo_evict_pinned(bo);
> -		xe_bo_put(bo);
> -		if (ret) {
> -			spin_lock(&xe->pinned.lock);
> -			list_splice_tail(&still_in_list,
> -					 &xe->pinned.external_vram);
> -			spin_unlock(&xe->pinned.lock);
> -			return ret;
> -		}
> -
> -		spin_lock(&xe->pinned.lock);
> -	}
> -	list_splice_tail(&still_in_list, &xe->pinned.external_vram);
> -	spin_unlock(&xe->pinned.lock);
> -
> -	/*
> -	 * Wait for all user BO to be evicted as those evictions depend on the
> -	 * memory moved below.
> -	 */
> -	for_each_tile(tile, xe, id)
> -		xe_tile_migrate_wait(tile);
> -
> -	spin_lock(&xe->pinned.lock);
> -	for (;;) {
> -		bo = list_first_entry_or_null(&xe->pinned.kernel_bo_present,
> -					      typeof(*bo), pinned_link);
> -		if (!bo)
> -			break;
> -		xe_bo_get(bo);
> -		list_move_tail(&bo->pinned_link, &xe->pinned.evicted);
> -		spin_unlock(&xe->pinned.lock);
> -
> -		ret = xe_bo_evict_pinned(bo);
> -		xe_bo_put(bo);
> -		if (ret)
> -			return ret;
> -
> -		spin_lock(&xe->pinned.lock);
> -	}
> -	spin_unlock(&xe->pinned.lock);
> +	/* Pinned user memory in VRAM and anything else that can be blitted
> */
> +	ret = xe_evict_pinned(xe, false);
> +	if (ret)
> +		return ret;
> 
> -	return 0;
> +	/* Pinned kernel memory that must use memcpy only */
> +	return xe_evict_pinned(xe, true);
>  }
> 
>  /**
> - * xe_bo_restore_kernel - restore kernel BOs to VRAM
> + * xe_restore_pinned - restore pinned BOs to VRAM
>   *
>   * @xe: xe device
> + * @memcpy_only: skip objects that dont need memcpy
>   *
> - * Move kernel BOs from temporary (typically system) memory to VRAM via
> CPU. All
> + * Move BOs from temporary (typically system) memory to VRAM. All
>   * moves done via TTM calls.
>   *
> - * This function should be called early, before trying to init the GT, on device
> - * resume.
> + * If this function is called early, before trying to init the GT, on device
> + * resume then @memcpy_only should be set.
>   */
> -int xe_bo_restore_kernel(struct xe_device *xe)
> +static int xe_restore_pinned(struct xe_device *xe, bool memcpy_only)
>  {
> +	struct xe_tile *tile;
> +	LIST_HEAD(skipped);
>  	struct xe_bo *bo;
> -	int ret;
> +	u8 id;
> +	int ret = 0;
> 
>  	spin_lock(&xe->pinned.lock);
>  	for (;;) {
> -		bo = list_first_entry_or_null(&xe->pinned.evicted,
> -					      typeof(*bo), pinned_link);
> +		bo = list_first_entry_or_null(&xe->pinned.evicted,
> typeof(*bo),
> +					      pinned_link);
>  		if (!bo)
>  			break;
> +
> +		if (memcpy_only && xe_bo_is_user(bo)) {
> +			list_move_tail(&bo->pinned_link, &skipped);
> +			continue;
> +		}
> +
>  		xe_bo_get(bo);
> -		list_move_tail(&bo->pinned_link, &xe-
> >pinned.kernel_bo_present);
> +		list_move_tail(&bo->pinned_link, &xe->pinned.present);
>  		spin_unlock(&xe->pinned.lock);
> 
>  		ret = xe_bo_restore_pinned(bo);
>  		if (ret) {
> +			spin_lock(&xe->pinned.lock);
> +			list_move_tail(&bo->pinned_link, &xe-
> >pinned.evicted);
>  			xe_bo_put(bo);
> -			return ret;
> +			break;
>  		}
> 
>  		if (bo->flags & XE_BO_FLAG_GGTT) {
> @@ -159,72 +163,50 @@ int xe_bo_restore_kernel(struct xe_device *xe)
>  			}
>  		}
> 
> -		/*
> -		 * We expect validate to trigger a move VRAM and our move
> code
> -		 * should setup the iosys map.
> -		 */
> -		xe_assert(xe, !iosys_map_is_null(&bo->vmap));
> -
>  		xe_bo_put(bo);
> 
>  		spin_lock(&xe->pinned.lock);
>  	}
> +
> +	list_splice_tail(&skipped, &xe->pinned.evicted);
>  	spin_unlock(&xe->pinned.lock);
> 
> -	return 0;
> +	if (!memcpy_only) {
> +		for_each_tile(tile, xe, id)
> +			xe_tile_migrate_wait(tile);
> +	}
> +
> +	return ret;
>  }
> 
>  /**
> - * xe_bo_restore_user - restore pinned user BOs to VRAM
> + * xe_bo_restore_pinned_early - restore kernel BOs to VRAM during early
> resume, before
> + * we have working migration/GuC, using memcopy
>   *
>   * @xe: xe device
>   *
> - * Move pinned user BOs from temporary (typically system) memory to
> VRAM via
> - * CPU. All moves done via TTM calls.
> + * Copy kernel BOs from temporary (typically system) memory to VRAM via
> CPU. All
> + * moves done via TTM calls.
>   *
> - * This function should be called late, after GT init, on device resume.
> + * This function should be called early, before trying to init the GT, on device
> + * resume.
>   */
> -int xe_bo_restore_user(struct xe_device *xe)
> +int xe_bo_restore_pinned_early(struct xe_device *xe)
>  {
> -	struct xe_bo *bo;
> -	struct xe_tile *tile;
> -	struct list_head still_in_list;
> -	u8 id;
> -	int ret;
> -
> -	if (!IS_DGFX(xe))
> -		return 0;
> -
> -	/* Pinned user memory in VRAM should be validated on resume */
> -	INIT_LIST_HEAD(&still_in_list);
> -	spin_lock(&xe->pinned.lock);
> -	for (;;) {
> -		bo = list_first_entry_or_null(&xe->pinned.external_vram,
> -					      typeof(*bo), pinned_link);
> -		if (!bo)
> -			break;
> -		list_move_tail(&bo->pinned_link, &still_in_list);
> -		xe_bo_get(bo);
> -		spin_unlock(&xe->pinned.lock);
> -
> -		ret = xe_bo_restore_pinned(bo);
> -		xe_bo_put(bo);
> -		if (ret) {
> -			spin_lock(&xe->pinned.lock);
> -			list_splice_tail(&still_in_list,
> -					 &xe->pinned.external_vram);
> -			spin_unlock(&xe->pinned.lock);
> -			return ret;
> -		}
> -
> -		spin_lock(&xe->pinned.lock);
> -	}
> -	list_splice_tail(&still_in_list, &xe->pinned.external_vram);
> -	spin_unlock(&xe->pinned.lock);
> -
> -	/* Wait for restore to complete */
> -	for_each_tile(tile, xe, id)
> -		xe_tile_migrate_wait(tile);
> +	return xe_restore_pinned(xe, true);
> +}
> 
> -	return 0;
> +/**
> + * xe_bo_restore_pinned_late - restore pinned BOs to VRAM using GPU copy
> + *
> + * @xe: xe device
> + *
> + * Copy kernel BOs from temporary (typically system) memory to VRAM via
> CPU. All
> + * moves done via TTM calls.
> + *
> + * This function should be called late, once submission is working.
> + */
> +int xe_bo_restore_pinned_late(struct xe_device *xe)
> +{
> +	return xe_restore_pinned(xe, false);
>  }
> diff --git a/drivers/gpu/drm/xe/xe_bo_evict.h
> b/drivers/gpu/drm/xe/xe_bo_evict.h
> index 746894798852..094f69e69cdb 100644
> --- a/drivers/gpu/drm/xe/xe_bo_evict.h
> +++ b/drivers/gpu/drm/xe/xe_bo_evict.h
> @@ -9,7 +9,7 @@
>  struct xe_device;
> 
>  int xe_bo_evict_all(struct xe_device *xe);
> -int xe_bo_restore_kernel(struct xe_device *xe);
> -int xe_bo_restore_user(struct xe_device *xe);
> +int xe_bo_restore_pinned_early(struct xe_device *xe);
> +int xe_bo_restore_pinned_late(struct xe_device *xe);
> 
>  #endif
> diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
> index bf36e4fb4679..14b691753ac8 100644
> --- a/drivers/gpu/drm/xe/xe_device.c
> +++ b/drivers/gpu/drm/xe/xe_device.c
> @@ -349,8 +349,7 @@ struct xe_device *xe_device_create(struct pci_dev
> *pdev,
>  	}
> 
>  	spin_lock_init(&xe->pinned.lock);
> -	INIT_LIST_HEAD(&xe->pinned.kernel_bo_present);
> -	INIT_LIST_HEAD(&xe->pinned.external_vram);
> +	INIT_LIST_HEAD(&xe->pinned.present);
>  	INIT_LIST_HEAD(&xe->pinned.evicted);
> 
>  	xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-
> fence-wq",
> diff --git a/drivers/gpu/drm/xe/xe_device_types.h
> b/drivers/gpu/drm/xe/xe_device_types.h
> index 8a7b15972413..aacf2eb37ac3 100644
> --- a/drivers/gpu/drm/xe/xe_device_types.h
> +++ b/drivers/gpu/drm/xe/xe_device_types.h
> @@ -405,12 +405,10 @@ struct xe_device {
>  	struct {
>  		/** @pinned.lock: protected pinned BO list state */
>  		spinlock_t lock;
> -		/** @pinned.kernel_bo_present: pinned kernel BO that are
> present */
> -		struct list_head kernel_bo_present;
> +		/** @pinned.present: pinned BO that are present */
> +		struct list_head present;
>  		/** @pinned.evicted: pinned BO that have been evicted */
>  		struct list_head evicted;
> -		/** @pinned.external_vram: pinned external BO in vram*/
> -		struct list_head external_vram;
>  	} pinned;
> 
>  	/** @ufence_wq: user fence wait queue */
> diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
> index a6761cb769b2..5a3c3436c491 100644
> --- a/drivers/gpu/drm/xe/xe_pm.c
> +++ b/drivers/gpu/drm/xe/xe_pm.c
> @@ -179,7 +179,7 @@ int xe_pm_resume(struct xe_device *xe)
>  	 * This only restores pinned memory which is the memory required for
> the
>  	 * GT(s) to resume.
>  	 */
> -	err = xe_bo_restore_kernel(xe);
> +	err = xe_bo_restore_pinned_early(xe);
>  	if (err)
>  		goto err;
> 
> @@ -188,12 +188,12 @@ int xe_pm_resume(struct xe_device *xe)
>  	for_each_gt(gt, xe, id)
>  		xe_gt_resume(gt);
> 
> -	xe_display_pm_resume(xe);
> -
> -	err = xe_bo_restore_user(xe);
> +	err = xe_bo_restore_pinned_late(xe);
>  	if (err)
>  		goto err;
> 
> +	xe_display_pm_resume(xe);
> +
>  	drm_dbg(&xe->drm, "Device resumed\n");
>  	return 0;
>  err:
> @@ -453,7 +453,7 @@ int xe_pm_runtime_resume(struct xe_device *xe)
>  		 * This only restores pinned memory which is the memory
>  		 * required for the GT(s) to resume.
>  		 */
> -		err = xe_bo_restore_kernel(xe);
> +		err = xe_bo_restore_pinned_early(xe);
>  		if (err)
>  			goto out;
>  	}
> @@ -463,14 +463,14 @@ int xe_pm_runtime_resume(struct xe_device *xe)
>  	for_each_gt(gt, xe, id)
>  		xe_gt_resume(gt);
> 
> -	xe_display_pm_runtime_resume(xe);
> -
>  	if (xe->d3cold.allowed) {
> -		err = xe_bo_restore_user(xe);
> +		err = xe_bo_restore_pinned_late(xe);
>  		if (err)
>  			goto out;
>  	}
> 
> +	xe_display_pm_runtime_resume(xe);
> +
>  out:
>  	xe_rpm_lockmap_release(xe);
>  	xe_pm_write_callback_task(xe, NULL);
> --
> 2.47.1



More information about the Intel-xe mailing list