[PATCH] drm/xe: Implement clear VRAM on free

Thu Jun 12 12:53:16 UTC 2025

On Tue, 2025-06-10 at 22:42 -0700, Matthew Brost wrote:
> Clearing on free should hide latency of BO clears on new user BO
> allocations.
> 
> Implemented via calling xe_migrate_clear in release notify and
> updating
> iterator in xe_migrate_clear to skip cleared buddy blocks. Only user
> BOs
> cleared in release notify as kernel BOs could still be in use (e.g.,
> PT
> BOs need to wait for dma-resv to be idle).

Wouldn't it be fully possible for a user to do (deep pipelining 3d
case)

create_bo();
map_write_unmap_bo();
bind_bo();
submit_job_touching_bo();
unbind_bo();
free_bo();

Where the free_bo and release_notify() is called long before the job we
submitted even started?

So that would mean the clear needs to await any previous fences, and
that dependency addition seems to have been removed from
xe_migrate_clear.

Another side-effect I think this will have is that bos that are deleted
are not subject to asynchronous evicton. I think if this bo is hit
during lru walk and clearing, TTM will just sync wait for it to become
idle and then free the memory. I think the reason that could not be
fixed in TTM is that TTM needs for all resource manager fences to be
ordered, but if a check for ordered fences which I think requires here
that the eviction exec_queue is the same as the clearing one, that
could be fixed in TTM.

Otherwise, this could also cause newly introduced sync waits in the
exec() and vm_bind paths where we previously performed eviction and the
subsequent clearing async.

Some additional stuff below:

> 
> Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> ---
>  drivers/gpu/drm/xe/xe_bo.c           | 47
> ++++++++++++++++++++++++++++
>  drivers/gpu/drm/xe/xe_migrate.c      | 14 ++++++---
>  drivers/gpu/drm/xe/xe_migrate.h      |  1 +
>  drivers/gpu/drm/xe/xe_res_cursor.h   | 26 +++++++++++++++
>  drivers/gpu/drm/xe/xe_ttm_vram_mgr.c |  5 ++-
>  drivers/gpu/drm/xe/xe_ttm_vram_mgr.h |  6 ++++
>  6 files changed, 94 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
> index 4e39188a021a..74470f4d418d 100644
> --- a/drivers/gpu/drm/xe/xe_bo.c
> +++ b/drivers/gpu/drm/xe/xe_bo.c
> @@ -1434,6 +1434,51 @@ static bool
> xe_ttm_bo_lock_in_destructor(struct ttm_buffer_object *ttm_bo)
>  	return locked;
>  }
>  
> +static void xe_ttm_bo_release_clear(struct ttm_buffer_object
> *ttm_bo)
> +{
> +	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
> +	struct dma_fence *fence;
> +	int err, idx;
> +
> +	xe_bo_assert_held(ttm_to_xe_bo(ttm_bo));
> +
> +	if (ttm_bo->type != ttm_bo_type_device)
> +		return;
> +
> +	if (xe_device_wedged(xe))
> +		return;
> +
> +	if (!ttm_bo->resource || !mem_type_is_vram(ttm_bo->resource-
> >mem_type))
> +		return;
> +
> +	if (!drm_dev_enter(&xe->drm, &idx))
> +		return;
> +
> +	if (!xe_pm_runtime_get_if_active(xe))
> +		goto unbind;
> +
> +	err = dma_resv_reserve_fences(&ttm_bo->base._resv, 1);
> +	if (err)
> +		goto put_pm;
> +
> +	fence = xe_migrate_clear(mem_type_to_migrate(xe, ttm_bo-
> >resource->mem_type),
> +				 ttm_to_xe_bo(ttm_bo), ttm_bo-
> >resource,

We should be very careful with passing the xe_bo part here because the
gem refcount is currently zero. So that any caller deeper down in the
call chain might try to do an xe_bo_get() and blow up.

Ideally we'd make xe_migrate_clear() operate only on the ttm_bo for
this to be safe.

/Thomas

> +				 XE_MIGRATE_CLEAR_FLAG_FULL |
> +				 XE_MIGRATE_CLEAR_NON_DIRTY);
> +	if (XE_WARN_ON(IS_ERR(fence)))
> +		goto put_pm;
> +
> +	xe_ttm_vram_mgr_resource_set_cleared(ttm_bo->resource);
> +	dma_resv_add_fence(&ttm_bo->base._resv, fence,
> +			   DMA_RESV_USAGE_KERNEL);
> +	dma_fence_put(fence);
> +
> +put_pm:
> +	xe_pm_runtime_put(xe);
> +unbind:
> +	drm_dev_exit(idx);
> +}
> +
>  static void xe_ttm_bo_release_notify(struct ttm_buffer_object
> *ttm_bo)
>  {
>  	struct dma_resv_iter cursor;
> @@ -1478,6 +1523,8 @@ static void xe_ttm_bo_release_notify(struct
> ttm_buffer_object *ttm_bo)
>  	}
>  	dma_fence_put(replacement);
>  
> +	xe_ttm_bo_release_clear(ttm_bo);
> +
>  	dma_resv_unlock(ttm_bo->base.resv);
>  }
>  
> diff --git a/drivers/gpu/drm/xe/xe_migrate.c
> b/drivers/gpu/drm/xe/xe_migrate.c
> index 8f8e9fdfb2a8..39d7200cb366 100644
> --- a/drivers/gpu/drm/xe/xe_migrate.c
> +++ b/drivers/gpu/drm/xe/xe_migrate.c
> @@ -1063,7 +1063,7 @@ struct dma_fence *xe_migrate_clear(struct
> xe_migrate *m,
>  	struct xe_gt *gt = m->tile->primary_gt;
>  	struct xe_device *xe = gt_to_xe(gt);
>  	bool clear_only_system_ccs = false;
> -	struct dma_fence *fence = NULL;
> +	struct dma_fence *fence = dma_fence_get_stub();
>  	u64 size = bo->size;
>  	struct xe_res_cursor src_it;
>  	struct ttm_resource *src = dst;
> @@ -1075,10 +1075,13 @@ struct dma_fence *xe_migrate_clear(struct
> xe_migrate *m,
>  	if (!clear_bo_data && clear_ccs && !IS_DGFX(xe))
>  		clear_only_system_ccs = true;
>  
> -	if (!clear_vram)
> +	if (!clear_vram) {
>  		xe_res_first_sg(xe_bo_sg(bo), 0, bo->size, &src_it);
> -	else
> +	} else {
>  		xe_res_first(src, 0, bo->size, &src_it);
> +		if (!(clear_flags & XE_MIGRATE_CLEAR_NON_DIRTY))
> +			size -= xe_res_next_dirty(&src_it);
> +	}
>  
>  	while (size) {
>  		u64 clear_L0_ofs;
> @@ -1125,6 +1128,9 @@ struct dma_fence *xe_migrate_clear(struct
> xe_migrate *m,
>  			emit_pte(m, bb, clear_L0_pt, clear_vram,
> clear_only_system_ccs,
>  				 &src_it, clear_L0, dst);
>  
> +		if (clear_vram && !(clear_flags &
> XE_MIGRATE_CLEAR_NON_DIRTY))
> +			size -= xe_res_next_dirty(&src_it);
> +
>  		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
>  		update_idx = bb->len;
>  
> @@ -1146,7 +1152,7 @@ struct dma_fence *xe_migrate_clear(struct
> xe_migrate *m,
>  		}
>  
>  		xe_sched_job_add_migrate_flush(job, flush_flags);
> -		if (!fence) {
> +		if (fence == dma_fence_get_stub()) {
>  			/*
>  			 * There can't be anything userspace related
> at this
>  			 * point, so we just need to respect any
> potential move
> diff --git a/drivers/gpu/drm/xe/xe_migrate.h
> b/drivers/gpu/drm/xe/xe_migrate.h
> index fb9839c1bae0..58a7b747ef11 100644
> --- a/drivers/gpu/drm/xe/xe_migrate.h
> +++ b/drivers/gpu/drm/xe/xe_migrate.h
> @@ -118,6 +118,7 @@ int xe_migrate_access_memory(struct xe_migrate
> *m, struct xe_bo *bo,
>  
>  #define XE_MIGRATE_CLEAR_FLAG_BO_DATA		BIT(0)
>  #define XE_MIGRATE_CLEAR_FLAG_CCS_DATA		BIT(1)
> +#define XE_MIGRATE_CLEAR_NON_DIRTY		BIT(2)
>  #define
> XE_MIGRATE_CLEAR_FLAG_FULL	(XE_MIGRATE_CLEAR_FLAG_BO_DATA | \
>  					XE_MIGRATE_CLEAR_FLAG_CCS_DA
> TA)
>  struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
> diff --git a/drivers/gpu/drm/xe/xe_res_cursor.h
> b/drivers/gpu/drm/xe/xe_res_cursor.h
> index d1a403cfb628..630082e809ba 100644
> --- a/drivers/gpu/drm/xe/xe_res_cursor.h
> +++ b/drivers/gpu/drm/xe/xe_res_cursor.h
> @@ -315,6 +315,32 @@ static inline void xe_res_next(struct
> xe_res_cursor *cur, u64 size)
>  	}
>  }
>  
> +/**
> + * xe_res_next_dirty - advance the cursor to next dirty buddy block
> + *
> + * @cur: the cursor to advance
> + *
> + * Move the cursor until dirty buddy block is found.
> + *
> + * Return: Number of bytes cursor has been advanced
> + */
> +static inline u64 xe_res_next_dirty(struct xe_res_cursor *cur)
> +{
> +	struct drm_buddy_block *block = cur->node;
> +	u64 bytes = 0;
> +
> +	XE_WARN_ON(cur->mem_type != XE_PL_VRAM0 &&
> +		   cur->mem_type != XE_PL_VRAM1);
> +
> +	while (cur->remaining && drm_buddy_block_is_clear(block)) {
> +		bytes += cur->size;
> +		xe_res_next(cur, cur->size);
> +		block = cur->node;
> +	}
> +
> +	return bytes;
> +}
> +
>  /**
>   * xe_res_dma - return dma address of cursor at current position
>   *
> diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
> b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
> index 9e375a40aee9..120046941c1e 100644
> --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
> +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
> @@ -84,6 +84,9 @@ static int xe_ttm_vram_mgr_new(struct
> ttm_resource_manager *man,
>  	if (place->fpfn || lpfn != man->size >> PAGE_SHIFT)
>  		vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
>  
> +	if (tbo->type == ttm_bo_type_device)
> +		vres->flags |= DRM_BUDDY_CLEAR_ALLOCATION;
> +
>  	if (WARN_ON(!vres->base.size)) {
>  		err = -EINVAL;
>  		goto error_fini;
> @@ -187,7 +190,7 @@ static void xe_ttm_vram_mgr_del(struct
> ttm_resource_manager *man,
>  	struct drm_buddy *mm = &mgr->mm;
>  
>  	mutex_lock(&mgr->lock);
> -	drm_buddy_free_list(mm, &vres->blocks, 0);
> +	drm_buddy_free_list(mm, &vres->blocks, vres->flags);
>  	mgr->visible_avail += vres->used_visible_size;
>  	mutex_unlock(&mgr->lock);
>  
> diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
> b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
> index cc76050e376d..dfc0e6890b3c 100644
> --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
> +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
> @@ -36,6 +36,12 @@ to_xe_ttm_vram_mgr_resource(struct ttm_resource
> *res)
>  	return container_of(res, struct xe_ttm_vram_mgr_resource,
> base);
>  }
>  
> +static inline void
> +xe_ttm_vram_mgr_resource_set_cleared(struct ttm_resource *res)
> +{
> +	to_xe_ttm_vram_mgr_resource(res)->flags |=
> DRM_BUDDY_CLEARED;
> +}
> +
>  static inline struct xe_ttm_vram_mgr *
>  to_xe_ttm_vram_mgr(struct ttm_resource_manager *man)
>  {