[PATCH v3 4/4] drm/xe: Implement clear VRAM on free
Matthew Auld
matthew.auld at intel.com
Fri Jul 4 09:17:24 UTC 2025
On 23/06/2025 16:19, Matthew Brost wrote:
> Clearing on free should hide latency of BO clears on new user BO
> allocations.
>
> Implemented via calling xe_migrate_clear in release notify and updating
> iterator in xe_migrate_clear to skip cleared buddy blocks. Only user BOs
> cleared in release notify as kernel BOs could still be in use (e.g., PT
> BOs need to wait for dma-resv to be idle).
>
> v2:
> - Update to new xe_migrate_clear arguments (Thomas)
> - Wait on BOOKKEEP slots for clear onn free (Thomas)
> - s/XE_MIGRATE_CLEAR_NON_DIRTY/XE_MIGRATE_CLEAR_FLAG_ON_FREE
> - Do not requested clear memory in VRAM manager if BO as TT data (Auld)
>
> Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> ---
> drivers/gpu/drm/xe/xe_bo.c | 48 ++++++++++++++++++++++++++++
> drivers/gpu/drm/xe/xe_migrate.c | 31 ++++++++++++------
> drivers/gpu/drm/xe/xe_migrate.h | 1 +
> drivers/gpu/drm/xe/xe_res_cursor.h | 26 +++++++++++++++
> drivers/gpu/drm/xe/xe_ttm_vram_mgr.c | 6 +++-
> drivers/gpu/drm/xe/xe_ttm_vram_mgr.h | 6 ++++
> 6 files changed, 108 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
> index 7a412121477e..900f67deef9d 100644
> --- a/drivers/gpu/drm/xe/xe_bo.c
> +++ b/drivers/gpu/drm/xe/xe_bo.c
> @@ -1453,6 +1453,52 @@ static bool xe_ttm_bo_lock_in_destructor(struct ttm_buffer_object *ttm_bo)
> return locked;
> }
>
> +static void xe_ttm_bo_release_clear(struct ttm_buffer_object *ttm_bo)
> +{
> + struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
> + struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
> + struct dma_fence *fence;
> + int err, idx;
> +
> + xe_bo_assert_held(ttm_to_xe_bo(ttm_bo));
> +
> + if (ttm_bo->type != ttm_bo_type_device)
> + return;
> +
> + if (xe_device_wedged(xe))
> + return;
> +
> + if (!ttm_bo->resource || !mem_type_is_vram(ttm_bo->resource->mem_type))
> + return;
> +
> + if (!drm_dev_enter(&xe->drm, &idx))
> + return;
> +
> + if (!xe_pm_runtime_get_if_active(xe))
> + goto unbind;
> +
> + err = dma_resv_reserve_fences(&ttm_bo->base._resv, 1);
> + if (err)
> + goto put_pm;
> +
> + fence = xe_migrate_clear(mem_type_to_migrate(xe, ttm_bo->resource->mem_type),
> + ttm_bo->resource, &ttm_bo->base._resv, NULL,
> + bo->size, XE_MIGRATE_CLEAR_FLAG_FULL |
> + XE_MIGRATE_CLEAR_FLAG_ON_FREE, &bo->ccs_cleared);
> + if (XE_WARN_ON(IS_ERR(fence)))
Should we chuck a warn for this? I think there might be some intr waits
in there which could legit trigger?
> + goto put_pm;
> +
> + xe_ttm_vram_mgr_resource_set_cleared(ttm_bo->resource);
> + dma_resv_add_fence(&ttm_bo->base._resv, fence,
> + DMA_RESV_USAGE_KERNEL);
> + dma_fence_put(fence);
> +
> +put_pm:
> + xe_pm_runtime_put(xe);
> +unbind:
> + drm_dev_exit(idx);
> +}
> +
> static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo)
> {
> struct dma_resv_iter cursor;
> @@ -1497,6 +1543,8 @@ static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo)
> }
> dma_fence_put(replacement);
>
> + xe_ttm_bo_release_clear(ttm_bo);
> +
> dma_resv_unlock(ttm_bo->base.resv);
> }
>
> diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
> index 18030f9613ae..7620f4eb8fb3 100644
> --- a/drivers/gpu/drm/xe/xe_migrate.c
> +++ b/drivers/gpu/drm/xe/xe_migrate.c
> @@ -1067,7 +1067,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
> struct xe_gt *gt = m->tile->primary_gt;
> struct xe_device *xe = gt_to_xe(gt);
> bool clear_only_system_ccs = false;
> - struct dma_fence *fence = NULL;
> + struct dma_fence *fence = dma_fence_get_stub();
> struct xe_res_cursor src_it;
> struct ttm_resource *src = dst;
> int err;
> @@ -1078,10 +1078,13 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
> if (!clear_bo_data && clear_ccs && !IS_DGFX(xe))
> clear_only_system_ccs = true;
>
> - if (!clear_vram)
> + if (!clear_vram) {
> xe_res_first_sg(sgt, 0, size, &src_it);
> - else
> + } else {
> xe_res_first(src, 0, size, &src_it);
> + if (!(clear_flags & XE_MIGRATE_CLEAR_FLAG_ON_FREE))
> + size -= xe_res_next_dirty(&src_it);
> + }
>
> while (size) {
> u64 clear_L0_ofs;
> @@ -1128,6 +1131,9 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
> emit_pte(m, bb, clear_L0_pt, clear_vram, clear_only_system_ccs,
> &src_it, clear_L0, dst);
>
> + if (clear_vram && !(clear_flags & XE_MIGRATE_CLEAR_FLAG_ON_FREE))
> + size -= xe_res_next_dirty(&src_it);
> +
> bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
> update_idx = bb->len;
>
> @@ -1149,15 +1155,22 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
> }
>
> xe_sched_job_add_migrate_flush(job, flush_flags);
> - if (!fence) {
> + if (fence == dma_fence_get_stub()) {
Does this get() always have a matching put()? Say fence != stub?
> + enum dma_resv_usage usage =
> + (clear_flags & XE_MIGRATE_CLEAR_FLAG_ON_FREE) ?
> + DMA_RESV_USAGE_BOOKKEEP : DMA_RESV_USAGE_KERNEL;
> +
> /*
> - * There can't be anything userspace related at this
> - * point, so we just need to respect any potential move
> - * fences, which are always tracked as
> + * For initial clears, there can't be anything userspace
> + * related at this point, so we just need to respect any
> + * potential move fences, which are always tracked as
> * DMA_RESV_USAGE_KERNEL.
> + *
> + * For clear on free need to respect all fences as
> + * memory could still be in use by the GPU which is
> + * tracked in DMA_RESV_USAGE_BOOKKEEP.
> */
> - err = xe_sched_job_add_deps(job, resv,
> - DMA_RESV_USAGE_KERNEL);
> + err = xe_sched_job_add_deps(job, resv, usage);
> if (err)
> goto err_job;
> }
> diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h
> index 04b3cd86ba90..104c61812ba1 100644
> --- a/drivers/gpu/drm/xe/xe_migrate.h
> +++ b/drivers/gpu/drm/xe/xe_migrate.h
> @@ -120,6 +120,7 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo,
>
> #define XE_MIGRATE_CLEAR_FLAG_BO_DATA BIT(0)
> #define XE_MIGRATE_CLEAR_FLAG_CCS_DATA BIT(1)
> +#define XE_MIGRATE_CLEAR_FLAG_ON_FREE BIT(2)
> #define XE_MIGRATE_CLEAR_FLAG_FULL (XE_MIGRATE_CLEAR_FLAG_BO_DATA | \
> XE_MIGRATE_CLEAR_FLAG_CCS_DATA)
> struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
> diff --git a/drivers/gpu/drm/xe/xe_res_cursor.h b/drivers/gpu/drm/xe/xe_res_cursor.h
> index d1a403cfb628..630082e809ba 100644
> --- a/drivers/gpu/drm/xe/xe_res_cursor.h
> +++ b/drivers/gpu/drm/xe/xe_res_cursor.h
> @@ -315,6 +315,32 @@ static inline void xe_res_next(struct xe_res_cursor *cur, u64 size)
> }
> }
>
> +/**
> + * xe_res_next_dirty - advance the cursor to next dirty buddy block
> + *
> + * @cur: the cursor to advance
> + *
> + * Move the cursor until dirty buddy block is found.
> + *
> + * Return: Number of bytes cursor has been advanced
> + */
> +static inline u64 xe_res_next_dirty(struct xe_res_cursor *cur)
> +{
> + struct drm_buddy_block *block = cur->node;
> + u64 bytes = 0;
> +
> + XE_WARN_ON(cur->mem_type != XE_PL_VRAM0 &&
> + cur->mem_type != XE_PL_VRAM1);
> +
> + while (cur->remaining && drm_buddy_block_is_clear(block)) {
> + bytes += cur->size;
> + xe_res_next(cur, cur->size);
> + block = cur->node;
> + }
> +
> + return bytes;
> +}
> +
> /**
> * xe_res_dma - return dma address of cursor at current position
> *
> diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
> index 9e375a40aee9..838739e47815 100644
> --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
> +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
> @@ -84,6 +84,10 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man,
> if (place->fpfn || lpfn != man->size >> PAGE_SHIFT)
> vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
>
> + if (tbo->type == ttm_bo_type_device &&
> + !xe_bo_tt_has_data(ttm_to_xe_bo(tbo)))
> + vres->flags |= DRM_BUDDY_CLEAR_ALLOCATION;
> +
> if (WARN_ON(!vres->base.size)) {
> err = -EINVAL;
> goto error_fini;
> @@ -187,7 +191,7 @@ static void xe_ttm_vram_mgr_del(struct ttm_resource_manager *man,
> struct drm_buddy *mm = &mgr->mm;
>
> mutex_lock(&mgr->lock);
> - drm_buddy_free_list(mm, &vres->blocks, 0);
> + drm_buddy_free_list(mm, &vres->blocks, vres->flags);
> mgr->visible_avail += vres->used_visible_size;
> mutex_unlock(&mgr->lock);
>
> diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
> index cc76050e376d..dfc0e6890b3c 100644
> --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
> +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
> @@ -36,6 +36,12 @@ to_xe_ttm_vram_mgr_resource(struct ttm_resource *res)
> return container_of(res, struct xe_ttm_vram_mgr_resource, base);
> }
>
> +static inline void
> +xe_ttm_vram_mgr_resource_set_cleared(struct ttm_resource *res)
> +{
> + to_xe_ttm_vram_mgr_resource(res)->flags |= DRM_BUDDY_CLEARED;
> +}
> +
> static inline struct xe_ttm_vram_mgr *
> to_xe_ttm_vram_mgr(struct ttm_resource_manager *man)
> {
More information about the Intel-xe
mailing list