[PATCH v4 1/7] drm/xe: use backup object for pinned save/restore

Matthew Auld matthew.auld at intel.com
Mon Mar 31 12:31:05 UTC 2025


Hi,

On 28/03/2025 15:28, Thomas Hellström wrote:
> Hi, Matthew, a partly unrelated question below.
> 
> On Wed, 2025-03-26 at 18:19 +0000, Matthew Auld wrote:
>> Currently we move pinned objects, relying on the fact that the
>> lpfn/fpfn
>> will force the placement to occupy the same pages when restoring.
>> However this then limits all such pinned objects to be contig
>> underneath. In addition it is likely a little fragile moving pinned
>> objects in the first place. Rather than moving such objects rather
>> copy
>> the page contents to a secondary system memory object, that way the
>> VRAM
>> pages never move and remain pinned. This also opens the door for
>> eventually having non-contig pinned objects that can also be
>> saved/restored using blitter.
>>
>> v2:
>>   - Make sure to drop the fence ref.
>>   - Handle NULL bo->migrate.
>> v3:
>>   - Ensure we add the copy fence to the BOs, otherwise backup_obj can
>>     be freed before pipelined copy finishes.
>> v4:
>>    - Rebase on newly added apply-to-pinned infra.
>>
>> Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1182
>> Signed-off-by: Matthew Auld <matthew.auld at intel.com>
>> Cc: Satyanarayana K V P <satyanarayana.k.v.p at intel.com>
>> Cc: Thomas Hellström <thomas.hellstrom at linux.intel.com>
>> Cc: Matthew Brost <matthew.brost at intel.com>
>> Reviewed-by: Satyanarayana K V P <satyanarayana.k.v.p at intel.com>
> 
> IMO a longer term goal is to move eviction on suspend and hibernation
> earlier, while swapout still works and, for hibernation, before
> estimating the size of the hibernation image. If we do that, would it
> be easy to pre-populate the backup bos at that point, to avoid memory
> allocations late in the process? This would likely be in a PM notifier.

Yeah, I think this makes sense. So maybe something roughly like this (on 
top of this series):

https://gitlab.freedesktop.org/mwa/kernel/-/commit/16bc0659764513088072f47c21804985b5e0d86d

So have evict-all-user and prepare-pinned called from the notifier for 
suspend. Not sure about the NOTIFY_BAD, or if we should ignore until 
real .suspend() for that prepare-pinned case. Or if we should keep the 
back object pinned until .suspend(). Also not sure if RPM also calls 
this notifier or not.

> 
> /Thomas
> 
> 
> 
>> ---
>>   drivers/gpu/drm/xe/xe_bo.c       | 339 ++++++++++++++++-------------
>> --
>>   drivers/gpu/drm/xe/xe_bo_evict.c |   2 -
>>   drivers/gpu/drm/xe/xe_bo_types.h |   2 +
>>   3 files changed, 179 insertions(+), 164 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
>> index 64f9c936eea0..362f08f8e743 100644
>> --- a/drivers/gpu/drm/xe/xe_bo.c
>> +++ b/drivers/gpu/drm/xe/xe_bo.c
>> @@ -898,79 +898,44 @@ static int xe_bo_move(struct ttm_buffer_object
>> *ttm_bo, bool evict,
>>   		xe_pm_runtime_get_noresume(xe);
>>   	}
>>   
>> -	if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) {
>> -		/*
>> -		 * Kernel memory that is pinned should only be moved
>> on suspend
>> -		 * / resume, some of the pinned memory is required
>> for the
>> -		 * device to resume / use the GPU to move other
>> evicted memory
>> -		 * (user memory) around. This likely could be
>> optimized a bit
>> -		 * further where we find the minimum set of pinned
>> memory
>> -		 * required for resume but for simplity doing a
>> memcpy for all
>> -		 * pinned memory.
>> -		 */
>> -		ret = xe_bo_vmap(bo);
>> -		if (!ret) {
>> -			ret = ttm_bo_move_memcpy(ttm_bo, ctx,
>> new_mem);
>> +	if (move_lacks_source) {
>> +		u32 flags = 0;
>>   
>> -			/* Create a new VMAP once kernel BO back in
>> VRAM */
>> -			if (!ret && resource_is_vram(new_mem)) {
>> -				struct xe_vram_region *vram =
>> res_to_mem_region(new_mem);
>> -				void __iomem *new_addr = vram-
>>> mapping +
>> -					(new_mem->start <<
>> PAGE_SHIFT);
>> +		if (mem_type_is_vram(new_mem->mem_type))
>> +			flags |= XE_MIGRATE_CLEAR_FLAG_FULL;
>> +		else if (handle_system_ccs)
>> +			flags |= XE_MIGRATE_CLEAR_FLAG_CCS_DATA;
>>   
>> -				if (XE_WARN_ON(new_mem->start ==
>> XE_BO_INVALID_OFFSET)) {
>> -					ret = -EINVAL;
>> -					xe_pm_runtime_put(xe);
>> -					goto out;
>> -				}
>> -
>> -				xe_assert(xe, new_mem->start ==
>> -					  bo->placements->fpfn);
>> -
>> -				iosys_map_set_vaddr_iomem(&bo->vmap,
>> new_addr);
>> -			}
>> +		fence = xe_migrate_clear(migrate, bo, new_mem,
>> flags);
>> +	} else {
>> +		fence = xe_migrate_copy(migrate, bo, bo, old_mem,
>> new_mem,
>> +					handle_system_ccs);
>> +	}
>> +	if (IS_ERR(fence)) {
>> +		ret = PTR_ERR(fence);
>> +		xe_pm_runtime_put(xe);
>> +		goto out;
>> +	}
>> +	if (!move_lacks_source) {
>> +		ret = ttm_bo_move_accel_cleanup(ttm_bo, fence,
>> evict, true,
>> +						new_mem);
>> +		if (ret) {
>> +			dma_fence_wait(fence, false);
>> +			ttm_bo_move_null(ttm_bo, new_mem);
>> +			ret = 0;
>>   		}
>>   	} else {
>> -		if (move_lacks_source) {
>> -			u32 flags = 0;
>> -
>> -			if (mem_type_is_vram(new_mem->mem_type))
>> -				flags |= XE_MIGRATE_CLEAR_FLAG_FULL;
>> -			else if (handle_system_ccs)
>> -				flags |=
>> XE_MIGRATE_CLEAR_FLAG_CCS_DATA;
>> -
>> -			fence = xe_migrate_clear(migrate, bo,
>> new_mem, flags);
>> -		}
>> -		else
>> -			fence = xe_migrate_copy(migrate, bo, bo,
>> old_mem,
>> -						new_mem,
>> handle_system_ccs);
>> -		if (IS_ERR(fence)) {
>> -			ret = PTR_ERR(fence);
>> -			xe_pm_runtime_put(xe);
>> -			goto out;
>> -		}
>> -		if (!move_lacks_source) {
>> -			ret = ttm_bo_move_accel_cleanup(ttm_bo,
>> fence, evict,
>> -							true,
>> new_mem);
>> -			if (ret) {
>> -				dma_fence_wait(fence, false);
>> -				ttm_bo_move_null(ttm_bo, new_mem);
>> -				ret = 0;
>> -			}
>> -		} else {
>> -			/*
>> -			 * ttm_bo_move_accel_cleanup() may blow up
>> if
>> -			 * bo->resource == NULL, so just attach the
>> -			 * fence and set the new resource.
>> -			 */
>> -			dma_resv_add_fence(ttm_bo->base.resv, fence,
>> -					   DMA_RESV_USAGE_KERNEL);
>> -			ttm_bo_move_null(ttm_bo, new_mem);
>> -		}
>> -
>> -		dma_fence_put(fence);
>> +		/*
>> +		 * ttm_bo_move_accel_cleanup() may blow up if
>> +		 * bo->resource == NULL, so just attach the
>> +		 * fence and set the new resource.
>> +		 */
>> +		dma_resv_add_fence(ttm_bo->base.resv, fence,
>> +				   DMA_RESV_USAGE_KERNEL);
>> +		ttm_bo_move_null(ttm_bo, new_mem);
>>   	}
>>   
>> +	dma_fence_put(fence);
>>   	xe_pm_runtime_put(xe);
>>   
>>   out:
>> @@ -1107,59 +1072,90 @@ long xe_bo_shrink(struct ttm_operation_ctx
>> *ctx, struct ttm_buffer_object *bo,
>>    */
>>   int xe_bo_evict_pinned(struct xe_bo *bo)
>>   {
>> -	struct ttm_place place = {
>> -		.mem_type = XE_PL_TT,
>> -	};
>> -	struct ttm_placement placement = {
>> -		.placement = &place,
>> -		.num_placement = 1,
>> -	};
>> -	struct ttm_operation_ctx ctx = {
>> -		.interruptible = false,
>> -		.gfp_retry_mayfail = true,
>> -	};
>> -	struct ttm_resource *new_mem;
>> -	int ret;
>> +	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
>> +	struct xe_bo *backup;
>> +	bool unmap = false;
>> +	int ret = 0;
>>   
>> -	xe_bo_assert_held(bo);
>> +	xe_bo_lock(bo, false);
>>   
>> -	if (WARN_ON(!bo->ttm.resource))
>> -		return -EINVAL;
>> -
>> -	if (WARN_ON(!xe_bo_is_pinned(bo)))
>> -		return -EINVAL;
>> -
>> -	if (!xe_bo_is_vram(bo))
>> -		return 0;
>> -
>> -	ret = ttm_bo_mem_space(&bo->ttm, &placement, &new_mem,
>> &ctx);
>> -	if (ret)
>> -		return ret;
>> -
>> -	if (!bo->ttm.ttm) {
>> -		bo->ttm.ttm = xe_ttm_tt_create(&bo->ttm, 0);
>> -		if (!bo->ttm.ttm) {
>> -			ret = -ENOMEM;
>> -			goto err_res_free;
>> -		}
>> +	if (WARN_ON(!bo->ttm.resource)) {
>> +		ret = -EINVAL;
>> +		goto out_unlock_bo;
>>   	}
>>   
>> -	ret = ttm_bo_populate(&bo->ttm, &ctx);
>> +	if (WARN_ON(!xe_bo_is_pinned(bo))) {
>> +		ret = -EINVAL;
>> +		goto out_unlock_bo;
>> +	}
>> +
>> +	if (!xe_bo_is_vram(bo))
>> +		goto out_unlock_bo;
>> +
>> +	backup = xe_bo_create_locked(xe, NULL, NULL, bo->size,
>> ttm_bo_type_kernel,
>> +				     XE_BO_FLAG_SYSTEM |
>> XE_BO_FLAG_NEEDS_CPU_ACCESS |
>> +				     XE_BO_FLAG_PINNED);
>> +	if (IS_ERR(backup)) {
>> +		ret = PTR_ERR(backup);
>> +		goto out_unlock_bo;
>> +	}
>> +
>> +	if (xe_bo_is_user(bo)) {
>> +		struct xe_migrate *migrate;
>> +		struct dma_fence *fence;
>> +
>> +		if (bo->tile)
>> +			migrate = bo->tile->migrate;
>> +		else
>> +			migrate = mem_type_to_migrate(xe, bo-
>>> ttm.resource->mem_type);
>> +
>> +		ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
>> +		if (ret)
>> +			goto out_backup;
>> +
>> +		ret = dma_resv_reserve_fences(backup->ttm.base.resv,
>> 1);
>> +		if (ret)
>> +			goto out_backup;
>> +
>> +		fence = xe_migrate_copy(migrate, bo, backup, bo-
>>> ttm.resource,
>> +					backup->ttm.resource,
>> false);
>> +		if (IS_ERR(fence)) {
>> +			ret = PTR_ERR(fence);
>> +			goto out_backup;
>> +		}
>> +
>> +		dma_resv_add_fence(bo->ttm.base.resv, fence,
>> +				   DMA_RESV_USAGE_KERNEL);
>> +		dma_resv_add_fence(backup->ttm.base.resv, fence,
>> +				   DMA_RESV_USAGE_KERNEL);
>> +		dma_fence_put(fence);
>> +	} else {
>> +		ret = xe_bo_vmap(backup);
>> +		if (ret)
>> +			goto out_backup;
>> +
>> +		if (iosys_map_is_null(&bo->vmap)) {
>> +			ret = xe_bo_vmap(bo);
>> +			if (ret)
>> +				goto out_backup;
>> +			unmap = true;
>> +		}
>> +
>> +		xe_map_memcpy_from(xe, backup->vmap.vaddr, &bo-
>>> vmap, 0,
>> +				   bo->size);
>> +	}
>> +
>> +	bo->backup_obj = backup;
>> +
>> +out_backup:
>> +	xe_bo_vunmap(backup);
>> +	xe_bo_unlock(backup);
>>   	if (ret)
>> -		goto err_res_free;
>> -
>> -	ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
>> -	if (ret)
>> -		goto err_res_free;
>> -
>> -	ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
>> -	if (ret)
>> -		goto err_res_free;
>> -
>> -	return 0;
>> -
>> -err_res_free:
>> -	ttm_resource_free(&bo->ttm, &new_mem);
>> +		xe_bo_put(backup);
>> +out_unlock_bo:
>> +	if (unmap)
>> +		xe_bo_vunmap(bo);
>> +	xe_bo_unlock(bo);
>>   	return ret;
>>   }
>>   
>> @@ -1180,47 +1176,82 @@ int xe_bo_restore_pinned(struct xe_bo *bo)
>>   		.interruptible = false,
>>   		.gfp_retry_mayfail = false,
>>   	};
>> -	struct ttm_resource *new_mem;
>> -	struct ttm_place *place = &bo->placements[0];
>> +	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
>> +	struct xe_bo *backup = bo->backup_obj;
>> +	bool unmap = false;
>>   	int ret;
>>   
>> -	xe_bo_assert_held(bo);
>> -
>> -	if (WARN_ON(!bo->ttm.resource))
>> -		return -EINVAL;
>> -
>> -	if (WARN_ON(!xe_bo_is_pinned(bo)))
>> -		return -EINVAL;
>> -
>> -	if (WARN_ON(xe_bo_is_vram(bo)))
>> -		return -EINVAL;
>> -
>> -	if (WARN_ON(!bo->ttm.ttm && !xe_bo_is_stolen(bo)))
>> -		return -EINVAL;
>> -
>> -	if (!mem_type_is_vram(place->mem_type))
>> +	if (!backup)
>>   		return 0;
>>   
>> -	ret = ttm_bo_mem_space(&bo->ttm, &bo->placement, &new_mem,
>> &ctx);
>> +	xe_bo_lock(backup, false);
>> +
>> +	ret = ttm_bo_validate(&backup->ttm, &backup->placement,
>> &ctx);
>>   	if (ret)
>> -		return ret;
>> +		goto out_backup;
>>   
>> -	ret = ttm_bo_populate(&bo->ttm, &ctx);
>> -	if (ret)
>> -		goto err_res_free;
>> +	if (WARN_ON(!dma_resv_trylock(bo->ttm.base.resv))) {
>> +		ret = -EBUSY;
>> +		goto out_backup;
>> +	}
>>   
>> -	ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
>> -	if (ret)
>> -		goto err_res_free;
>> +	if (xe_bo_is_user(bo)) {
>> +		struct xe_migrate *migrate;
>> +		struct dma_fence *fence;
>>   
>> -	ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
>> -	if (ret)
>> -		goto err_res_free;
>> +		if (bo->tile)
>> +			migrate = bo->tile->migrate;
>> +		else
>> +			migrate = mem_type_to_migrate(xe, bo-
>>> ttm.resource->mem_type);
>>   
>> -	return 0;
>> +		ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
>> +		if (ret)
>> +			goto out_unlock_bo;
>>   
>> -err_res_free:
>> -	ttm_resource_free(&bo->ttm, &new_mem);
>> +		ret = dma_resv_reserve_fences(backup->ttm.base.resv,
>> 1);
>> +		if (ret)
>> +			goto out_unlock_bo;
>> +
>> +		fence = xe_migrate_copy(migrate, backup, bo,
>> +					backup->ttm.resource, bo-
>>> ttm.resource,
>> +					false);
>> +		if (IS_ERR(fence)) {
>> +			ret = PTR_ERR(fence);
>> +			goto out_unlock_bo;
>> +		}
>> +
>> +		dma_resv_add_fence(bo->ttm.base.resv, fence,
>> +				   DMA_RESV_USAGE_KERNEL);
>> +		dma_resv_add_fence(backup->ttm.base.resv, fence,
>> +				   DMA_RESV_USAGE_KERNEL);
>> +		dma_fence_put(fence);
>> +	} else {
>> +		ret = xe_bo_vmap(backup);
>> +		if (ret)
>> +			goto out_unlock_bo;
>> +
>> +		if (iosys_map_is_null(&bo->vmap)) {
>> +			ret = xe_bo_vmap(bo);
>> +			if (ret)
>> +				goto out_unlock_bo;
>> +			unmap = true;
>> +		}
>> +
>> +		xe_map_memcpy_to(xe, &bo->vmap, 0, backup-
>>> vmap.vaddr,
>> +				 bo->size);
>> +	}
>> +
>> +	bo->backup_obj = NULL;
>> +
>> +out_unlock_bo:
>> +	if (unmap)
>> +		xe_bo_vunmap(bo);
>> +	xe_bo_unlock(bo);
>> +out_backup:
>> +	xe_bo_vunmap(backup);
>> +	xe_bo_unlock(backup);
>> +	if (!bo->backup_obj)
>> +		xe_bo_put(backup);
>>   	return ret;
>>   }
>>   
>> @@ -2149,22 +2180,6 @@ int xe_bo_pin(struct xe_bo *bo)
>>   	if (err)
>>   		return err;
>>   
>> -	/*
>> -	 * For pinned objects in on DGFX, which are also in vram, we
>> expect
>> -	 * these to be in contiguous VRAM memory. Required eviction
>> / restore
>> -	 * during suspend / resume (force restore to same physical
>> address).
>> -	 */
>> -	if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
>> -	    bo->flags & XE_BO_FLAG_INTERNAL_TEST)) {
>> -		if (mem_type_is_vram(place->mem_type)) {
>> -			xe_assert(xe, place->flags &
>> TTM_PL_FLAG_CONTIGUOUS);
>> -
>> -			place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE)
>> -
>> -				       vram_region_gpu_offset(bo-
>>> ttm.resource)) >> PAGE_SHIFT;
>> -			place->lpfn = place->fpfn + (bo->size >>
>> PAGE_SHIFT);
>> -		}
>> -	}
>> -
>>   	if (mem_type_is_vram(place->mem_type) || bo->flags &
>> XE_BO_FLAG_GGTT) {
>>   		spin_lock(&xe->pinned.lock);
>>   		list_add_tail(&bo->pinned_link, &xe-
>>> pinned.kernel_bo_present);
>> diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c
>> b/drivers/gpu/drm/xe/xe_bo_evict.c
>> index 1eeb3910450b..6e6a5d7a5617 100644
>> --- a/drivers/gpu/drm/xe/xe_bo_evict.c
>> +++ b/drivers/gpu/drm/xe/xe_bo_evict.c
>> @@ -31,14 +31,12 @@ static int xe_bo_apply_to_pinned(struct xe_device
>> *xe,
>>   		list_move_tail(&bo->pinned_link, &still_in_list);
>>   		spin_unlock(&xe->pinned.lock);
>>   
>> -		xe_bo_lock(bo, false);
>>   		ret = pinned_fn(bo);
>>   		if (ret && pinned_list != new_list) {
>>   			spin_lock(&xe->pinned.lock);
>>   			list_move(&bo->pinned_link, pinned_list);
>>   			spin_unlock(&xe->pinned.lock);
>>   		}
>> -		xe_bo_unlock(bo);
>>   		xe_bo_put(bo);
>>   		spin_lock(&xe->pinned.lock);
>>   	}
>> diff --git a/drivers/gpu/drm/xe/xe_bo_types.h
>> b/drivers/gpu/drm/xe/xe_bo_types.h
>> index 15a92e3d4898..81396181aaea 100644
>> --- a/drivers/gpu/drm/xe/xe_bo_types.h
>> +++ b/drivers/gpu/drm/xe/xe_bo_types.h
>> @@ -28,6 +28,8 @@ struct xe_vm;
>>   struct xe_bo {
>>   	/** @ttm: TTM base buffer object */
>>   	struct ttm_buffer_object ttm;
>> +	/** @backup_obj: The backup object when pinned and suspended
>> (vram only) */
>> +	struct xe_bo *backup_obj;
>>   	/** @size: Size of this buffer object */
>>   	size_t size;
>>   	/** @flags: flags for this buffer object */
> 



More information about the Intel-xe mailing list