[PATCH v4 1/7] drm/xe: use backup object for pinned save/restore

Thomas Hellström thomas.hellstrom at linux.intel.com
Fri Mar 28 15:28:19 UTC 2025


Hi, Matthew, a partly unrelated question below.

On Wed, 2025-03-26 at 18:19 +0000, Matthew Auld wrote:
> Currently we move pinned objects, relying on the fact that the
> lpfn/fpfn
> will force the placement to occupy the same pages when restoring.
> However this then limits all such pinned objects to be contig
> underneath. In addition it is likely a little fragile moving pinned
> objects in the first place. Rather than moving such objects rather
> copy
> the page contents to a secondary system memory object, that way the
> VRAM
> pages never move and remain pinned. This also opens the door for
> eventually having non-contig pinned objects that can also be
> saved/restored using blitter.
> 
> v2:
>  - Make sure to drop the fence ref.
>  - Handle NULL bo->migrate.
> v3:
>  - Ensure we add the copy fence to the BOs, otherwise backup_obj can
>    be freed before pipelined copy finishes.
> v4:
>   - Rebase on newly added apply-to-pinned infra.
> 
> Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1182
> Signed-off-by: Matthew Auld <matthew.auld at intel.com>
> Cc: Satyanarayana K V P <satyanarayana.k.v.p at intel.com>
> Cc: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> Cc: Matthew Brost <matthew.brost at intel.com>
> Reviewed-by: Satyanarayana K V P <satyanarayana.k.v.p at intel.com>

IMO a longer term goal is to move eviction on suspend and hibernation
earlier, while swapout still works and, for hibernation, before
estimating the size of the hibernation image. If we do that, would it
be easy to pre-populate the backup bos at that point, to avoid memory
allocations late in the process? This would likely be in a PM notifier.

/Thomas



> ---
>  drivers/gpu/drm/xe/xe_bo.c       | 339 ++++++++++++++++-------------
> --
>  drivers/gpu/drm/xe/xe_bo_evict.c |   2 -
>  drivers/gpu/drm/xe/xe_bo_types.h |   2 +
>  3 files changed, 179 insertions(+), 164 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
> index 64f9c936eea0..362f08f8e743 100644
> --- a/drivers/gpu/drm/xe/xe_bo.c
> +++ b/drivers/gpu/drm/xe/xe_bo.c
> @@ -898,79 +898,44 @@ static int xe_bo_move(struct ttm_buffer_object
> *ttm_bo, bool evict,
>  		xe_pm_runtime_get_noresume(xe);
>  	}
>  
> -	if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) {
> -		/*
> -		 * Kernel memory that is pinned should only be moved
> on suspend
> -		 * / resume, some of the pinned memory is required
> for the
> -		 * device to resume / use the GPU to move other
> evicted memory
> -		 * (user memory) around. This likely could be
> optimized a bit
> -		 * further where we find the minimum set of pinned
> memory
> -		 * required for resume but for simplity doing a
> memcpy for all
> -		 * pinned memory.
> -		 */
> -		ret = xe_bo_vmap(bo);
> -		if (!ret) {
> -			ret = ttm_bo_move_memcpy(ttm_bo, ctx,
> new_mem);
> +	if (move_lacks_source) {
> +		u32 flags = 0;
>  
> -			/* Create a new VMAP once kernel BO back in
> VRAM */
> -			if (!ret && resource_is_vram(new_mem)) {
> -				struct xe_vram_region *vram =
> res_to_mem_region(new_mem);
> -				void __iomem *new_addr = vram-
> >mapping +
> -					(new_mem->start <<
> PAGE_SHIFT);
> +		if (mem_type_is_vram(new_mem->mem_type))
> +			flags |= XE_MIGRATE_CLEAR_FLAG_FULL;
> +		else if (handle_system_ccs)
> +			flags |= XE_MIGRATE_CLEAR_FLAG_CCS_DATA;
>  
> -				if (XE_WARN_ON(new_mem->start ==
> XE_BO_INVALID_OFFSET)) {
> -					ret = -EINVAL;
> -					xe_pm_runtime_put(xe);
> -					goto out;
> -				}
> -
> -				xe_assert(xe, new_mem->start ==
> -					  bo->placements->fpfn);
> -
> -				iosys_map_set_vaddr_iomem(&bo->vmap,
> new_addr);
> -			}
> +		fence = xe_migrate_clear(migrate, bo, new_mem,
> flags);
> +	} else {
> +		fence = xe_migrate_copy(migrate, bo, bo, old_mem,
> new_mem,
> +					handle_system_ccs);
> +	}
> +	if (IS_ERR(fence)) {
> +		ret = PTR_ERR(fence);
> +		xe_pm_runtime_put(xe);
> +		goto out;
> +	}
> +	if (!move_lacks_source) {
> +		ret = ttm_bo_move_accel_cleanup(ttm_bo, fence,
> evict, true,
> +						new_mem);
> +		if (ret) {
> +			dma_fence_wait(fence, false);
> +			ttm_bo_move_null(ttm_bo, new_mem);
> +			ret = 0;
>  		}
>  	} else {
> -		if (move_lacks_source) {
> -			u32 flags = 0;
> -
> -			if (mem_type_is_vram(new_mem->mem_type))
> -				flags |= XE_MIGRATE_CLEAR_FLAG_FULL;
> -			else if (handle_system_ccs)
> -				flags |=
> XE_MIGRATE_CLEAR_FLAG_CCS_DATA;
> -
> -			fence = xe_migrate_clear(migrate, bo,
> new_mem, flags);
> -		}
> -		else
> -			fence = xe_migrate_copy(migrate, bo, bo,
> old_mem,
> -						new_mem,
> handle_system_ccs);
> -		if (IS_ERR(fence)) {
> -			ret = PTR_ERR(fence);
> -			xe_pm_runtime_put(xe);
> -			goto out;
> -		}
> -		if (!move_lacks_source) {
> -			ret = ttm_bo_move_accel_cleanup(ttm_bo,
> fence, evict,
> -							true,
> new_mem);
> -			if (ret) {
> -				dma_fence_wait(fence, false);
> -				ttm_bo_move_null(ttm_bo, new_mem);
> -				ret = 0;
> -			}
> -		} else {
> -			/*
> -			 * ttm_bo_move_accel_cleanup() may blow up
> if
> -			 * bo->resource == NULL, so just attach the
> -			 * fence and set the new resource.
> -			 */
> -			dma_resv_add_fence(ttm_bo->base.resv, fence,
> -					   DMA_RESV_USAGE_KERNEL);
> -			ttm_bo_move_null(ttm_bo, new_mem);
> -		}
> -
> -		dma_fence_put(fence);
> +		/*
> +		 * ttm_bo_move_accel_cleanup() may blow up if
> +		 * bo->resource == NULL, so just attach the
> +		 * fence and set the new resource.
> +		 */
> +		dma_resv_add_fence(ttm_bo->base.resv, fence,
> +				   DMA_RESV_USAGE_KERNEL);
> +		ttm_bo_move_null(ttm_bo, new_mem);
>  	}
>  
> +	dma_fence_put(fence);
>  	xe_pm_runtime_put(xe);
>  
>  out:
> @@ -1107,59 +1072,90 @@ long xe_bo_shrink(struct ttm_operation_ctx
> *ctx, struct ttm_buffer_object *bo,
>   */
>  int xe_bo_evict_pinned(struct xe_bo *bo)
>  {
> -	struct ttm_place place = {
> -		.mem_type = XE_PL_TT,
> -	};
> -	struct ttm_placement placement = {
> -		.placement = &place,
> -		.num_placement = 1,
> -	};
> -	struct ttm_operation_ctx ctx = {
> -		.interruptible = false,
> -		.gfp_retry_mayfail = true,
> -	};
> -	struct ttm_resource *new_mem;
> -	int ret;
> +	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
> +	struct xe_bo *backup;
> +	bool unmap = false;
> +	int ret = 0;
>  
> -	xe_bo_assert_held(bo);
> +	xe_bo_lock(bo, false);
>  
> -	if (WARN_ON(!bo->ttm.resource))
> -		return -EINVAL;
> -
> -	if (WARN_ON(!xe_bo_is_pinned(bo)))
> -		return -EINVAL;
> -
> -	if (!xe_bo_is_vram(bo))
> -		return 0;
> -
> -	ret = ttm_bo_mem_space(&bo->ttm, &placement, &new_mem,
> &ctx);
> -	if (ret)
> -		return ret;
> -
> -	if (!bo->ttm.ttm) {
> -		bo->ttm.ttm = xe_ttm_tt_create(&bo->ttm, 0);
> -		if (!bo->ttm.ttm) {
> -			ret = -ENOMEM;
> -			goto err_res_free;
> -		}
> +	if (WARN_ON(!bo->ttm.resource)) {
> +		ret = -EINVAL;
> +		goto out_unlock_bo;
>  	}
>  
> -	ret = ttm_bo_populate(&bo->ttm, &ctx);
> +	if (WARN_ON(!xe_bo_is_pinned(bo))) {
> +		ret = -EINVAL;
> +		goto out_unlock_bo;
> +	}
> +
> +	if (!xe_bo_is_vram(bo))
> +		goto out_unlock_bo;
> +
> +	backup = xe_bo_create_locked(xe, NULL, NULL, bo->size,
> ttm_bo_type_kernel,
> +				     XE_BO_FLAG_SYSTEM |
> XE_BO_FLAG_NEEDS_CPU_ACCESS |
> +				     XE_BO_FLAG_PINNED);
> +	if (IS_ERR(backup)) {
> +		ret = PTR_ERR(backup);
> +		goto out_unlock_bo;
> +	}
> +
> +	if (xe_bo_is_user(bo)) {
> +		struct xe_migrate *migrate;
> +		struct dma_fence *fence;
> +
> +		if (bo->tile)
> +			migrate = bo->tile->migrate;
> +		else
> +			migrate = mem_type_to_migrate(xe, bo-
> >ttm.resource->mem_type);
> +
> +		ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
> +		if (ret)
> +			goto out_backup;
> +
> +		ret = dma_resv_reserve_fences(backup->ttm.base.resv,
> 1);
> +		if (ret)
> +			goto out_backup;
> +
> +		fence = xe_migrate_copy(migrate, bo, backup, bo-
> >ttm.resource,
> +					backup->ttm.resource,
> false);
> +		if (IS_ERR(fence)) {
> +			ret = PTR_ERR(fence);
> +			goto out_backup;
> +		}
> +
> +		dma_resv_add_fence(bo->ttm.base.resv, fence,
> +				   DMA_RESV_USAGE_KERNEL);
> +		dma_resv_add_fence(backup->ttm.base.resv, fence,
> +				   DMA_RESV_USAGE_KERNEL);
> +		dma_fence_put(fence);
> +	} else {
> +		ret = xe_bo_vmap(backup);
> +		if (ret)
> +			goto out_backup;
> +
> +		if (iosys_map_is_null(&bo->vmap)) {
> +			ret = xe_bo_vmap(bo);
> +			if (ret)
> +				goto out_backup;
> +			unmap = true;
> +		}
> +
> +		xe_map_memcpy_from(xe, backup->vmap.vaddr, &bo-
> >vmap, 0,
> +				   bo->size);
> +	}
> +
> +	bo->backup_obj = backup;
> +
> +out_backup:
> +	xe_bo_vunmap(backup);
> +	xe_bo_unlock(backup);
>  	if (ret)
> -		goto err_res_free;
> -
> -	ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
> -	if (ret)
> -		goto err_res_free;
> -
> -	ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
> -	if (ret)
> -		goto err_res_free;
> -
> -	return 0;
> -
> -err_res_free:
> -	ttm_resource_free(&bo->ttm, &new_mem);
> +		xe_bo_put(backup);
> +out_unlock_bo:
> +	if (unmap)
> +		xe_bo_vunmap(bo);
> +	xe_bo_unlock(bo);
>  	return ret;
>  }
>  
> @@ -1180,47 +1176,82 @@ int xe_bo_restore_pinned(struct xe_bo *bo)
>  		.interruptible = false,
>  		.gfp_retry_mayfail = false,
>  	};
> -	struct ttm_resource *new_mem;
> -	struct ttm_place *place = &bo->placements[0];
> +	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
> +	struct xe_bo *backup = bo->backup_obj;
> +	bool unmap = false;
>  	int ret;
>  
> -	xe_bo_assert_held(bo);
> -
> -	if (WARN_ON(!bo->ttm.resource))
> -		return -EINVAL;
> -
> -	if (WARN_ON(!xe_bo_is_pinned(bo)))
> -		return -EINVAL;
> -
> -	if (WARN_ON(xe_bo_is_vram(bo)))
> -		return -EINVAL;
> -
> -	if (WARN_ON(!bo->ttm.ttm && !xe_bo_is_stolen(bo)))
> -		return -EINVAL;
> -
> -	if (!mem_type_is_vram(place->mem_type))
> +	if (!backup)
>  		return 0;
>  
> -	ret = ttm_bo_mem_space(&bo->ttm, &bo->placement, &new_mem,
> &ctx);
> +	xe_bo_lock(backup, false);
> +
> +	ret = ttm_bo_validate(&backup->ttm, &backup->placement,
> &ctx);
>  	if (ret)
> -		return ret;
> +		goto out_backup;
>  
> -	ret = ttm_bo_populate(&bo->ttm, &ctx);
> -	if (ret)
> -		goto err_res_free;
> +	if (WARN_ON(!dma_resv_trylock(bo->ttm.base.resv))) {
> +		ret = -EBUSY;
> +		goto out_backup;
> +	}
>  
> -	ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
> -	if (ret)
> -		goto err_res_free;
> +	if (xe_bo_is_user(bo)) {
> +		struct xe_migrate *migrate;
> +		struct dma_fence *fence;
>  
> -	ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
> -	if (ret)
> -		goto err_res_free;
> +		if (bo->tile)
> +			migrate = bo->tile->migrate;
> +		else
> +			migrate = mem_type_to_migrate(xe, bo-
> >ttm.resource->mem_type);
>  
> -	return 0;
> +		ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
> +		if (ret)
> +			goto out_unlock_bo;
>  
> -err_res_free:
> -	ttm_resource_free(&bo->ttm, &new_mem);
> +		ret = dma_resv_reserve_fences(backup->ttm.base.resv,
> 1);
> +		if (ret)
> +			goto out_unlock_bo;
> +
> +		fence = xe_migrate_copy(migrate, backup, bo,
> +					backup->ttm.resource, bo-
> >ttm.resource,
> +					false);
> +		if (IS_ERR(fence)) {
> +			ret = PTR_ERR(fence);
> +			goto out_unlock_bo;
> +		}
> +
> +		dma_resv_add_fence(bo->ttm.base.resv, fence,
> +				   DMA_RESV_USAGE_KERNEL);
> +		dma_resv_add_fence(backup->ttm.base.resv, fence,
> +				   DMA_RESV_USAGE_KERNEL);
> +		dma_fence_put(fence);
> +	} else {
> +		ret = xe_bo_vmap(backup);
> +		if (ret)
> +			goto out_unlock_bo;
> +
> +		if (iosys_map_is_null(&bo->vmap)) {
> +			ret = xe_bo_vmap(bo);
> +			if (ret)
> +				goto out_unlock_bo;
> +			unmap = true;
> +		}
> +
> +		xe_map_memcpy_to(xe, &bo->vmap, 0, backup-
> >vmap.vaddr,
> +				 bo->size);
> +	}
> +
> +	bo->backup_obj = NULL;
> +
> +out_unlock_bo:
> +	if (unmap)
> +		xe_bo_vunmap(bo);
> +	xe_bo_unlock(bo);
> +out_backup:
> +	xe_bo_vunmap(backup);
> +	xe_bo_unlock(backup);
> +	if (!bo->backup_obj)
> +		xe_bo_put(backup);
>  	return ret;
>  }
>  
> @@ -2149,22 +2180,6 @@ int xe_bo_pin(struct xe_bo *bo)
>  	if (err)
>  		return err;
>  
> -	/*
> -	 * For pinned objects in on DGFX, which are also in vram, we
> expect
> -	 * these to be in contiguous VRAM memory. Required eviction
> / restore
> -	 * during suspend / resume (force restore to same physical
> address).
> -	 */
> -	if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
> -	    bo->flags & XE_BO_FLAG_INTERNAL_TEST)) {
> -		if (mem_type_is_vram(place->mem_type)) {
> -			xe_assert(xe, place->flags &
> TTM_PL_FLAG_CONTIGUOUS);
> -
> -			place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE)
> -
> -				       vram_region_gpu_offset(bo-
> >ttm.resource)) >> PAGE_SHIFT;
> -			place->lpfn = place->fpfn + (bo->size >>
> PAGE_SHIFT);
> -		}
> -	}
> -
>  	if (mem_type_is_vram(place->mem_type) || bo->flags &
> XE_BO_FLAG_GGTT) {
>  		spin_lock(&xe->pinned.lock);
>  		list_add_tail(&bo->pinned_link, &xe-
> >pinned.kernel_bo_present);
> diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c
> b/drivers/gpu/drm/xe/xe_bo_evict.c
> index 1eeb3910450b..6e6a5d7a5617 100644
> --- a/drivers/gpu/drm/xe/xe_bo_evict.c
> +++ b/drivers/gpu/drm/xe/xe_bo_evict.c
> @@ -31,14 +31,12 @@ static int xe_bo_apply_to_pinned(struct xe_device
> *xe,
>  		list_move_tail(&bo->pinned_link, &still_in_list);
>  		spin_unlock(&xe->pinned.lock);
>  
> -		xe_bo_lock(bo, false);
>  		ret = pinned_fn(bo);
>  		if (ret && pinned_list != new_list) {
>  			spin_lock(&xe->pinned.lock);
>  			list_move(&bo->pinned_link, pinned_list);
>  			spin_unlock(&xe->pinned.lock);
>  		}
> -		xe_bo_unlock(bo);
>  		xe_bo_put(bo);
>  		spin_lock(&xe->pinned.lock);
>  	}
> diff --git a/drivers/gpu/drm/xe/xe_bo_types.h
> b/drivers/gpu/drm/xe/xe_bo_types.h
> index 15a92e3d4898..81396181aaea 100644
> --- a/drivers/gpu/drm/xe/xe_bo_types.h
> +++ b/drivers/gpu/drm/xe/xe_bo_types.h
> @@ -28,6 +28,8 @@ struct xe_vm;
>  struct xe_bo {
>  	/** @ttm: TTM base buffer object */
>  	struct ttm_buffer_object ttm;
> +	/** @backup_obj: The backup object when pinned and suspended
> (vram only) */
> +	struct xe_bo *backup_obj;
>  	/** @size: Size of this buffer object */
>  	size_t size;
>  	/** @flags: flags for this buffer object */



More information about the Intel-xe mailing list