[PATCH v4 1/7] drm/xe: use backup object for pinned save/restore

Thomas Hellström thomas.hellstrom at linux.intel.com
Mon Mar 31 15:07:21 UTC 2025


Hi,

On Mon, 2025-03-31 at 13:31 +0100, Matthew Auld wrote:
> Hi,
> 
> On 28/03/2025 15:28, Thomas Hellström wrote:
> > Hi, Matthew, a partly unrelated question below.
> > 
> > On Wed, 2025-03-26 at 18:19 +0000, Matthew Auld wrote:
> > > Currently we move pinned objects, relying on the fact that the
> > > lpfn/fpfn
> > > will force the placement to occupy the same pages when restoring.
> > > However this then limits all such pinned objects to be contig
> > > underneath. In addition it is likely a little fragile moving
> > > pinned
> > > objects in the first place. Rather than moving such objects
> > > rather
> > > copy
> > > the page contents to a secondary system memory object, that way
> > > the
> > > VRAM
> > > pages never move and remain pinned. This also opens the door for
> > > eventually having non-contig pinned objects that can also be
> > > saved/restored using blitter.
> > > 
> > > v2:
> > >   - Make sure to drop the fence ref.
> > >   - Handle NULL bo->migrate.
> > > v3:
> > >   - Ensure we add the copy fence to the BOs, otherwise backup_obj
> > > can
> > >     be freed before pipelined copy finishes.
> > > v4:
> > >    - Rebase on newly added apply-to-pinned infra.
> > > 
> > > Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1182
> > > Signed-off-by: Matthew Auld <matthew.auld at intel.com>
> > > Cc: Satyanarayana K V P <satyanarayana.k.v.p at intel.com>
> > > Cc: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> > > Cc: Matthew Brost <matthew.brost at intel.com>
> > > Reviewed-by: Satyanarayana K V P <satyanarayana.k.v.p at intel.com>
> > 
> > IMO a longer term goal is to move eviction on suspend and
> > hibernation
> > earlier, while swapout still works and, for hibernation, before
> > estimating the size of the hibernation image. If we do that, would
> > it
> > be easy to pre-populate the backup bos at that point, to avoid
> > memory
> > allocations late in the process? This would likely be in a PM
> > notifier.
> 
> Yeah, I think this makes sense. So maybe something roughly like this
> (on 
> top of this series):
> 
> https://gitlab.freedesktop.org/mwa/kernel/-/commit/16bc0659764513088072f47c21804985b5e0d86d
> 
> So have evict-all-user and prepare-pinned called from the notifier
> for 
> suspend. Not sure about the NOTIFY_BAD, or if we should ignore until 
> real .suspend() for that prepare-pinned case. Or if we should keep
> the 
> back object pinned until .suspend(). Also not sure if RPM also calls 
> this notifier or not.

Yes, exactly something like this.

We need to sort out the above. And also perhaps the hibernation
sequence so that we ensure we do this before the size calculation of
the hibernation image. Also I have a vague recollection that the
notifiers should be symmetric in the sense that everything done pre-
hibernation needs to be undone in a notifier post-hibernation, in case
the hibernation itself fails.

/Thomas


> 
> > 
> > /Thomas
> > 
> > 
> > 
> > > ---
> > >   drivers/gpu/drm/xe/xe_bo.c       | 339 ++++++++++++++++--------
> > > -----
> > > --
> > >   drivers/gpu/drm/xe/xe_bo_evict.c |   2 -
> > >   drivers/gpu/drm/xe/xe_bo_types.h |   2 +
> > >   3 files changed, 179 insertions(+), 164 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/xe/xe_bo.c
> > > b/drivers/gpu/drm/xe/xe_bo.c
> > > index 64f9c936eea0..362f08f8e743 100644
> > > --- a/drivers/gpu/drm/xe/xe_bo.c
> > > +++ b/drivers/gpu/drm/xe/xe_bo.c
> > > @@ -898,79 +898,44 @@ static int xe_bo_move(struct
> > > ttm_buffer_object
> > > *ttm_bo, bool evict,
> > >   		xe_pm_runtime_get_noresume(xe);
> > >   	}
> > >   
> > > -	if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) {
> > > -		/*
> > > -		 * Kernel memory that is pinned should only be
> > > moved
> > > on suspend
> > > -		 * / resume, some of the pinned memory is
> > > required
> > > for the
> > > -		 * device to resume / use the GPU to move other
> > > evicted memory
> > > -		 * (user memory) around. This likely could be
> > > optimized a bit
> > > -		 * further where we find the minimum set of
> > > pinned
> > > memory
> > > -		 * required for resume but for simplity doing a
> > > memcpy for all
> > > -		 * pinned memory.
> > > -		 */
> > > -		ret = xe_bo_vmap(bo);
> > > -		if (!ret) {
> > > -			ret = ttm_bo_move_memcpy(ttm_bo, ctx,
> > > new_mem);
> > > +	if (move_lacks_source) {
> > > +		u32 flags = 0;
> > >   
> > > -			/* Create a new VMAP once kernel BO back
> > > in
> > > VRAM */
> > > -			if (!ret && resource_is_vram(new_mem)) {
> > > -				struct xe_vram_region *vram =
> > > res_to_mem_region(new_mem);
> > > -				void __iomem *new_addr = vram-
> > > > mapping +
> > > -					(new_mem->start <<
> > > PAGE_SHIFT);
> > > +		if (mem_type_is_vram(new_mem->mem_type))
> > > +			flags |= XE_MIGRATE_CLEAR_FLAG_FULL;
> > > +		else if (handle_system_ccs)
> > > +			flags |= XE_MIGRATE_CLEAR_FLAG_CCS_DATA;
> > >   
> > > -				if (XE_WARN_ON(new_mem->start ==
> > > XE_BO_INVALID_OFFSET)) {
> > > -					ret = -EINVAL;
> > > -					xe_pm_runtime_put(xe);
> > > -					goto out;
> > > -				}
> > > -
> > > -				xe_assert(xe, new_mem->start ==
> > > -					  bo->placements->fpfn);
> > > -
> > > -				iosys_map_set_vaddr_iomem(&bo-
> > > >vmap,
> > > new_addr);
> > > -			}
> > > +		fence = xe_migrate_clear(migrate, bo, new_mem,
> > > flags);
> > > +	} else {
> > > +		fence = xe_migrate_copy(migrate, bo, bo,
> > > old_mem,
> > > new_mem,
> > > +					handle_system_ccs);
> > > +	}
> > > +	if (IS_ERR(fence)) {
> > > +		ret = PTR_ERR(fence);
> > > +		xe_pm_runtime_put(xe);
> > > +		goto out;
> > > +	}
> > > +	if (!move_lacks_source) {
> > > +		ret = ttm_bo_move_accel_cleanup(ttm_bo, fence,
> > > evict, true,
> > > +						new_mem);
> > > +		if (ret) {
> > > +			dma_fence_wait(fence, false);
> > > +			ttm_bo_move_null(ttm_bo, new_mem);
> > > +			ret = 0;
> > >   		}
> > >   	} else {
> > > -		if (move_lacks_source) {
> > > -			u32 flags = 0;
> > > -
> > > -			if (mem_type_is_vram(new_mem->mem_type))
> > > -				flags |=
> > > XE_MIGRATE_CLEAR_FLAG_FULL;
> > > -			else if (handle_system_ccs)
> > > -				flags |=
> > > XE_MIGRATE_CLEAR_FLAG_CCS_DATA;
> > > -
> > > -			fence = xe_migrate_clear(migrate, bo,
> > > new_mem, flags);
> > > -		}
> > > -		else
> > > -			fence = xe_migrate_copy(migrate, bo, bo,
> > > old_mem,
> > > -						new_mem,
> > > handle_system_ccs);
> > > -		if (IS_ERR(fence)) {
> > > -			ret = PTR_ERR(fence);
> > > -			xe_pm_runtime_put(xe);
> > > -			goto out;
> > > -		}
> > > -		if (!move_lacks_source) {
> > > -			ret = ttm_bo_move_accel_cleanup(ttm_bo,
> > > fence, evict,
> > > -							true,
> > > new_mem);
> > > -			if (ret) {
> > > -				dma_fence_wait(fence, false);
> > > -				ttm_bo_move_null(ttm_bo,
> > > new_mem);
> > > -				ret = 0;
> > > -			}
> > > -		} else {
> > > -			/*
> > > -			 * ttm_bo_move_accel_cleanup() may blow
> > > up
> > > if
> > > -			 * bo->resource == NULL, so just attach
> > > the
> > > -			 * fence and set the new resource.
> > > -			 */
> > > -			dma_resv_add_fence(ttm_bo->base.resv,
> > > fence,
> > > -					  
> > > DMA_RESV_USAGE_KERNEL);
> > > -			ttm_bo_move_null(ttm_bo, new_mem);
> > > -		}
> > > -
> > > -		dma_fence_put(fence);
> > > +		/*
> > > +		 * ttm_bo_move_accel_cleanup() may blow up if
> > > +		 * bo->resource == NULL, so just attach the
> > > +		 * fence and set the new resource.
> > > +		 */
> > > +		dma_resv_add_fence(ttm_bo->base.resv, fence,
> > > +				   DMA_RESV_USAGE_KERNEL);
> > > +		ttm_bo_move_null(ttm_bo, new_mem);
> > >   	}
> > >   
> > > +	dma_fence_put(fence);
> > >   	xe_pm_runtime_put(xe);
> > >   
> > >   out:
> > > @@ -1107,59 +1072,90 @@ long xe_bo_shrink(struct
> > > ttm_operation_ctx
> > > *ctx, struct ttm_buffer_object *bo,
> > >    */
> > >   int xe_bo_evict_pinned(struct xe_bo *bo)
> > >   {
> > > -	struct ttm_place place = {
> > > -		.mem_type = XE_PL_TT,
> > > -	};
> > > -	struct ttm_placement placement = {
> > > -		.placement = &place,
> > > -		.num_placement = 1,
> > > -	};
> > > -	struct ttm_operation_ctx ctx = {
> > > -		.interruptible = false,
> > > -		.gfp_retry_mayfail = true,
> > > -	};
> > > -	struct ttm_resource *new_mem;
> > > -	int ret;
> > > +	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
> > > +	struct xe_bo *backup;
> > > +	bool unmap = false;
> > > +	int ret = 0;
> > >   
> > > -	xe_bo_assert_held(bo);
> > > +	xe_bo_lock(bo, false);
> > >   
> > > -	if (WARN_ON(!bo->ttm.resource))
> > > -		return -EINVAL;
> > > -
> > > -	if (WARN_ON(!xe_bo_is_pinned(bo)))
> > > -		return -EINVAL;
> > > -
> > > -	if (!xe_bo_is_vram(bo))
> > > -		return 0;
> > > -
> > > -	ret = ttm_bo_mem_space(&bo->ttm, &placement, &new_mem,
> > > &ctx);
> > > -	if (ret)
> > > -		return ret;
> > > -
> > > -	if (!bo->ttm.ttm) {
> > > -		bo->ttm.ttm = xe_ttm_tt_create(&bo->ttm, 0);
> > > -		if (!bo->ttm.ttm) {
> > > -			ret = -ENOMEM;
> > > -			goto err_res_free;
> > > -		}
> > > +	if (WARN_ON(!bo->ttm.resource)) {
> > > +		ret = -EINVAL;
> > > +		goto out_unlock_bo;
> > >   	}
> > >   
> > > -	ret = ttm_bo_populate(&bo->ttm, &ctx);
> > > +	if (WARN_ON(!xe_bo_is_pinned(bo))) {
> > > +		ret = -EINVAL;
> > > +		goto out_unlock_bo;
> > > +	}
> > > +
> > > +	if (!xe_bo_is_vram(bo))
> > > +		goto out_unlock_bo;
> > > +
> > > +	backup = xe_bo_create_locked(xe, NULL, NULL, bo->size,
> > > ttm_bo_type_kernel,
> > > +				     XE_BO_FLAG_SYSTEM |
> > > XE_BO_FLAG_NEEDS_CPU_ACCESS |
> > > +				     XE_BO_FLAG_PINNED);
> > > +	if (IS_ERR(backup)) {
> > > +		ret = PTR_ERR(backup);
> > > +		goto out_unlock_bo;
> > > +	}
> > > +
> > > +	if (xe_bo_is_user(bo)) {
> > > +		struct xe_migrate *migrate;
> > > +		struct dma_fence *fence;
> > > +
> > > +		if (bo->tile)
> > > +			migrate = bo->tile->migrate;
> > > +		else
> > > +			migrate = mem_type_to_migrate(xe, bo-
> > > > ttm.resource->mem_type);
> > > +
> > > +		ret = dma_resv_reserve_fences(bo->ttm.base.resv,
> > > 1);
> > > +		if (ret)
> > > +			goto out_backup;
> > > +
> > > +		ret = dma_resv_reserve_fences(backup-
> > > >ttm.base.resv,
> > > 1);
> > > +		if (ret)
> > > +			goto out_backup;
> > > +
> > > +		fence = xe_migrate_copy(migrate, bo, backup, bo-
> > > > ttm.resource,
> > > +					backup->ttm.resource,
> > > false);
> > > +		if (IS_ERR(fence)) {
> > > +			ret = PTR_ERR(fence);
> > > +			goto out_backup;
> > > +		}
> > > +
> > > +		dma_resv_add_fence(bo->ttm.base.resv, fence,
> > > +				   DMA_RESV_USAGE_KERNEL);
> > > +		dma_resv_add_fence(backup->ttm.base.resv, fence,
> > > +				   DMA_RESV_USAGE_KERNEL);
> > > +		dma_fence_put(fence);
> > > +	} else {
> > > +		ret = xe_bo_vmap(backup);
> > > +		if (ret)
> > > +			goto out_backup;
> > > +
> > > +		if (iosys_map_is_null(&bo->vmap)) {
> > > +			ret = xe_bo_vmap(bo);
> > > +			if (ret)
> > > +				goto out_backup;
> > > +			unmap = true;
> > > +		}
> > > +
> > > +		xe_map_memcpy_from(xe, backup->vmap.vaddr, &bo-
> > > > vmap, 0,
> > > +				   bo->size);
> > > +	}
> > > +
> > > +	bo->backup_obj = backup;
> > > +
> > > +out_backup:
> > > +	xe_bo_vunmap(backup);
> > > +	xe_bo_unlock(backup);
> > >   	if (ret)
> > > -		goto err_res_free;
> > > -
> > > -	ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
> > > -	if (ret)
> > > -		goto err_res_free;
> > > -
> > > -	ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
> > > -	if (ret)
> > > -		goto err_res_free;
> > > -
> > > -	return 0;
> > > -
> > > -err_res_free:
> > > -	ttm_resource_free(&bo->ttm, &new_mem);
> > > +		xe_bo_put(backup);
> > > +out_unlock_bo:
> > > +	if (unmap)
> > > +		xe_bo_vunmap(bo);
> > > +	xe_bo_unlock(bo);
> > >   	return ret;
> > >   }
> > >   
> > > @@ -1180,47 +1176,82 @@ int xe_bo_restore_pinned(struct xe_bo
> > > *bo)
> > >   		.interruptible = false,
> > >   		.gfp_retry_mayfail = false,
> > >   	};
> > > -	struct ttm_resource *new_mem;
> > > -	struct ttm_place *place = &bo->placements[0];
> > > +	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
> > > +	struct xe_bo *backup = bo->backup_obj;
> > > +	bool unmap = false;
> > >   	int ret;
> > >   
> > > -	xe_bo_assert_held(bo);
> > > -
> > > -	if (WARN_ON(!bo->ttm.resource))
> > > -		return -EINVAL;
> > > -
> > > -	if (WARN_ON(!xe_bo_is_pinned(bo)))
> > > -		return -EINVAL;
> > > -
> > > -	if (WARN_ON(xe_bo_is_vram(bo)))
> > > -		return -EINVAL;
> > > -
> > > -	if (WARN_ON(!bo->ttm.ttm && !xe_bo_is_stolen(bo)))
> > > -		return -EINVAL;
> > > -
> > > -	if (!mem_type_is_vram(place->mem_type))
> > > +	if (!backup)
> > >   		return 0;
> > >   
> > > -	ret = ttm_bo_mem_space(&bo->ttm, &bo->placement,
> > > &new_mem,
> > > &ctx);
> > > +	xe_bo_lock(backup, false);
> > > +
> > > +	ret = ttm_bo_validate(&backup->ttm, &backup->placement,
> > > &ctx);
> > >   	if (ret)
> > > -		return ret;
> > > +		goto out_backup;
> > >   
> > > -	ret = ttm_bo_populate(&bo->ttm, &ctx);
> > > -	if (ret)
> > > -		goto err_res_free;
> > > +	if (WARN_ON(!dma_resv_trylock(bo->ttm.base.resv))) {
> > > +		ret = -EBUSY;
> > > +		goto out_backup;
> > > +	}
> > >   
> > > -	ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
> > > -	if (ret)
> > > -		goto err_res_free;
> > > +	if (xe_bo_is_user(bo)) {
> > > +		struct xe_migrate *migrate;
> > > +		struct dma_fence *fence;
> > >   
> > > -	ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
> > > -	if (ret)
> > > -		goto err_res_free;
> > > +		if (bo->tile)
> > > +			migrate = bo->tile->migrate;
> > > +		else
> > > +			migrate = mem_type_to_migrate(xe, bo-
> > > > ttm.resource->mem_type);
> > >   
> > > -	return 0;
> > > +		ret = dma_resv_reserve_fences(bo->ttm.base.resv,
> > > 1);
> > > +		if (ret)
> > > +			goto out_unlock_bo;
> > >   
> > > -err_res_free:
> > > -	ttm_resource_free(&bo->ttm, &new_mem);
> > > +		ret = dma_resv_reserve_fences(backup-
> > > >ttm.base.resv,
> > > 1);
> > > +		if (ret)
> > > +			goto out_unlock_bo;
> > > +
> > > +		fence = xe_migrate_copy(migrate, backup, bo,
> > > +					backup->ttm.resource,
> > > bo-
> > > > ttm.resource,
> > > +					false);
> > > +		if (IS_ERR(fence)) {
> > > +			ret = PTR_ERR(fence);
> > > +			goto out_unlock_bo;
> > > +		}
> > > +
> > > +		dma_resv_add_fence(bo->ttm.base.resv, fence,
> > > +				   DMA_RESV_USAGE_KERNEL);
> > > +		dma_resv_add_fence(backup->ttm.base.resv, fence,
> > > +				   DMA_RESV_USAGE_KERNEL);
> > > +		dma_fence_put(fence);
> > > +	} else {
> > > +		ret = xe_bo_vmap(backup);
> > > +		if (ret)
> > > +			goto out_unlock_bo;
> > > +
> > > +		if (iosys_map_is_null(&bo->vmap)) {
> > > +			ret = xe_bo_vmap(bo);
> > > +			if (ret)
> > > +				goto out_unlock_bo;
> > > +			unmap = true;
> > > +		}
> > > +
> > > +		xe_map_memcpy_to(xe, &bo->vmap, 0, backup-
> > > > vmap.vaddr,
> > > +				 bo->size);
> > > +	}
> > > +
> > > +	bo->backup_obj = NULL;
> > > +
> > > +out_unlock_bo:
> > > +	if (unmap)
> > > +		xe_bo_vunmap(bo);
> > > +	xe_bo_unlock(bo);
> > > +out_backup:
> > > +	xe_bo_vunmap(backup);
> > > +	xe_bo_unlock(backup);
> > > +	if (!bo->backup_obj)
> > > +		xe_bo_put(backup);
> > >   	return ret;
> > >   }
> > >   
> > > @@ -2149,22 +2180,6 @@ int xe_bo_pin(struct xe_bo *bo)
> > >   	if (err)
> > >   		return err;
> > >   
> > > -	/*
> > > -	 * For pinned objects in on DGFX, which are also in
> > > vram, we
> > > expect
> > > -	 * these to be in contiguous VRAM memory. Required
> > > eviction
> > > / restore
> > > -	 * during suspend / resume (force restore to same
> > > physical
> > > address).
> > > -	 */
> > > -	if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
> > > -	    bo->flags & XE_BO_FLAG_INTERNAL_TEST)) {
> > > -		if (mem_type_is_vram(place->mem_type)) {
> > > -			xe_assert(xe, place->flags &
> > > TTM_PL_FLAG_CONTIGUOUS);
> > > -
> > > -			place->fpfn = (xe_bo_addr(bo, 0,
> > > PAGE_SIZE)
> > > -
> > > -				      
> > > vram_region_gpu_offset(bo-
> > > > ttm.resource)) >> PAGE_SHIFT;
> > > -			place->lpfn = place->fpfn + (bo->size >>
> > > PAGE_SHIFT);
> > > -		}
> > > -	}
> > > -
> > >   	if (mem_type_is_vram(place->mem_type) || bo->flags &
> > > XE_BO_FLAG_GGTT) {
> > >   		spin_lock(&xe->pinned.lock);
> > >   		list_add_tail(&bo->pinned_link, &xe-
> > > > pinned.kernel_bo_present);
> > > diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c
> > > b/drivers/gpu/drm/xe/xe_bo_evict.c
> > > index 1eeb3910450b..6e6a5d7a5617 100644
> > > --- a/drivers/gpu/drm/xe/xe_bo_evict.c
> > > +++ b/drivers/gpu/drm/xe/xe_bo_evict.c
> > > @@ -31,14 +31,12 @@ static int xe_bo_apply_to_pinned(struct
> > > xe_device
> > > *xe,
> > >   		list_move_tail(&bo->pinned_link,
> > > &still_in_list);
> > >   		spin_unlock(&xe->pinned.lock);
> > >   
> > > -		xe_bo_lock(bo, false);
> > >   		ret = pinned_fn(bo);
> > >   		if (ret && pinned_list != new_list) {
> > >   			spin_lock(&xe->pinned.lock);
> > >   			list_move(&bo->pinned_link,
> > > pinned_list);
> > >   			spin_unlock(&xe->pinned.lock);
> > >   		}
> > > -		xe_bo_unlock(bo);
> > >   		xe_bo_put(bo);
> > >   		spin_lock(&xe->pinned.lock);
> > >   	}
> > > diff --git a/drivers/gpu/drm/xe/xe_bo_types.h
> > > b/drivers/gpu/drm/xe/xe_bo_types.h
> > > index 15a92e3d4898..81396181aaea 100644
> > > --- a/drivers/gpu/drm/xe/xe_bo_types.h
> > > +++ b/drivers/gpu/drm/xe/xe_bo_types.h
> > > @@ -28,6 +28,8 @@ struct xe_vm;
> > >   struct xe_bo {
> > >   	/** @ttm: TTM base buffer object */
> > >   	struct ttm_buffer_object ttm;
> > > +	/** @backup_obj: The backup object when pinned and
> > > suspended
> > > (vram only) */
> > > +	struct xe_bo *backup_obj;
> > >   	/** @size: Size of this buffer object */
> > >   	size_t size;
> > >   	/** @flags: flags for this buffer object */
> > 
> 



More information about the Intel-xe mailing list