[RFC PATCH] drm/xe/lnl: Implement clear-on-free for pooled BOs

Matthew Brost matthew.brost at intel.com
Fri Aug 23 06:27:53 UTC 2024


On Thu, Aug 22, 2024 at 02:42:44PM +0200, Nirmoy Das wrote:
> Implement GPU clear-on-free for pooled system pages in Xe.
> 
> Ensure proper use of TTM_TT_FLAG_CLEARED_ON_FREE by leveraging
> ttm_device_funcs.release_notify() for GPU clear-on-free. If GPU clear
> fails, xe_ttm_tt_unpopulate() will fallback to CPU clear.
> 
> Clear-on-free is only relevant for pooled pages as driver needs to give
> back those pages. So do clear-on-free only for such BOs and keep doing
> clear-on-alloc for ttm_cached type BOs
> 
> Cc: Matthew Auld <matthew.auld at intel.com>
> Cc: Matthew Brost <matthew.brost at intel.com>

I haven't been following the TTM changes but so don't feel comfortable
giving an RB but the job / dma-fence handling looks correct to me. So
does the placement of the clear job. One nit below.
 
> Cc: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> Signed-off-by: Nirmoy Das <nirmoy.das at intel.com>
> ---
>  drivers/gpu/drm/xe/xe_bo.c | 101 +++++++++++++++++++++++++++++++++----
>  1 file changed, 91 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
> index 6ed0e1955215..e7bc74f8ae82 100644
> --- a/drivers/gpu/drm/xe/xe_bo.c
> +++ b/drivers/gpu/drm/xe/xe_bo.c
> @@ -283,6 +283,8 @@ struct xe_ttm_tt {
>  	struct device *dev;
>  	struct sg_table sgt;
>  	struct sg_table *sg;
> +	bool sys_clear_on_free;
> +	bool sys_clear_on_alloc;

Nit:

bool sys_clear_on_free :1;
bool sys_clear_on_alloc :1;

Matt

>  };
>  
>  static int xe_tt_map_sg(struct ttm_tt *tt)
> @@ -401,8 +403,23 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
>  	 * flag. Zeroed pages are only required for ttm_bo_type_device so
>  	 * unwanted data is not leaked to userspace.
>  	 */
> -	if (ttm_bo->type == ttm_bo_type_device && xe->mem.gpu_page_clear_sys)
> -		page_flags |= TTM_TT_FLAG_CLEARED_ON_FREE;
> +	if (ttm_bo->type == ttm_bo_type_device && xe->mem.gpu_page_clear_sys) {
> +		/*
> +		 * Non-pooled BOs are always clear on alloc when possible.
> +		 * clear-on-free is not needed as there is no pool to give pages back.
> +		 */
> +		if (caching == ttm_cached) {
> +			tt->sys_clear_on_alloc = true;
> +			tt->sys_clear_on_free = false;
> +		} else {
> +		/*
> +		 * For pooled BO, clear-on-alloc is done by the CPU for now and
> +		 * GPU will do clear on free when releasing the BO.
> +		 */
> +			tt->sys_clear_on_alloc = false;
> +			tt->sys_clear_on_free = true;
> +		}
> +	}
>  
>  	err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags, caching, extra_pages);
>  	if (err) {
> @@ -416,8 +433,10 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
>  static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt,
>  			      struct ttm_operation_ctx *ctx)
>  {
> +	struct xe_ttm_tt *xe_tt;
>  	int err;
>  
> +	xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
>  	/*
>  	 * dma-bufs are not populated with pages, and the dma-
>  	 * addresses are set up when moved to XE_PL_TT.
> @@ -426,7 +445,7 @@ static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt,
>  		return 0;
>  
>  	/* Clear TTM_TT_FLAG_ZERO_ALLOC when GPU is set to clear system pages */
> -	if (tt->page_flags & TTM_TT_FLAG_CLEARED_ON_FREE)
> +	if (xe_tt->sys_clear_on_alloc)
>  		tt->page_flags &= ~TTM_TT_FLAG_ZERO_ALLOC;
>  
>  	err = ttm_pool_alloc(&ttm_dev->pool, tt, ctx);
> @@ -438,11 +457,19 @@ static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt,
>  
>  static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt)
>  {
> +	struct xe_ttm_tt *xe_tt;
> +
> +	xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
> +
>  	if (tt->page_flags & TTM_TT_FLAG_EXTERNAL)
>  		return;
>  
>  	xe_tt_unmap_sg(tt);
>  
> +	/* Hint TTM pool that pages are already cleared */
> +	if (xe_tt->sys_clear_on_free)
> +		tt->page_flags |= TTM_TT_FLAG_CLEARED_ON_FREE;
> +
>  	return ttm_pool_free(&ttm_dev->pool, tt);
>  }
>  
> @@ -664,6 +691,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
>  	struct ttm_resource *old_mem = ttm_bo->resource;
>  	u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
>  	struct ttm_tt *ttm = ttm_bo->ttm;
> +	struct xe_ttm_tt *xe_tt;
>  	struct xe_migrate *migrate = NULL;
>  	struct dma_fence *fence;
>  	bool move_lacks_source;
> @@ -674,12 +702,13 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
>  	bool clear_system_pages;
>  	int ret = 0;
>  
> +	xe_tt = container_of(ttm_bo->ttm, struct xe_ttm_tt, ttm);
>  	/*
>  	 * Clear TTM_TT_FLAG_CLEARED_ON_FREE on bo creation path when
>  	 * moving to system as the bo doesn't have dma_mapping.
>  	 */
>  	if (!old_mem && ttm && !ttm_tt_is_populated(ttm))
> -		ttm->page_flags &= ~TTM_TT_FLAG_CLEARED_ON_FREE;
> +		xe_tt->sys_clear_on_alloc = false;
>  
>  	/* Bo creation path, moving to system or TT. */
>  	if ((!old_mem && ttm) && !handle_system_ccs) {
> @@ -703,10 +732,9 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
>  	move_lacks_source = handle_system_ccs ? (!bo->ccs_cleared)  :
>  						(!mem_type_is_vram(old_mem_type) && !tt_has_data);
>  
> -	clear_system_pages = ttm && (ttm->page_flags & TTM_TT_FLAG_CLEARED_ON_FREE);
> +	clear_system_pages = ttm && xe_tt->sys_clear_on_alloc;
>  	needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) ||
> -		(!ttm && ttm_bo->type == ttm_bo_type_device) ||
> -		clear_system_pages;
> +		(!ttm && ttm_bo->type == ttm_bo_type_device) || clear_system_pages;
>  
>  	if (new_mem->mem_type == XE_PL_TT) {
>  		ret = xe_tt_map_sg(ttm);
> @@ -1028,10 +1056,47 @@ static bool xe_ttm_bo_lock_in_destructor(struct ttm_buffer_object *ttm_bo)
>  	return locked;
>  }
>  
> +static struct dma_fence *xe_ttm_bo_clear_on_free(struct ttm_buffer_object *ttm_bo)
> +{
> +	struct xe_bo *bo  = ttm_to_xe_bo(ttm_bo);
> +	struct xe_device *xe = xe_bo_device(bo);
> +	struct xe_migrate *migrate;
> +	struct xe_ttm_tt *xe_tt;
> +	struct dma_fence *clear_fence;
> +
> +	/* return early if nothing to clear */
> +	if (!ttm_bo->ttm)
> +		return NULL;
> +
> +	xe_tt = container_of(ttm_bo->ttm, struct xe_ttm_tt, ttm);
> +	/* return early if nothing to clear */
> +	if (!xe_tt->sys_clear_on_free || !bo->ttm.resource)
> +		return NULL;
> +
> +	if (XE_WARN_ON(!xe_tt->sg))
> +		return NULL;
> +
> +	if (bo->tile)
> +		migrate = bo->tile->migrate;
> +	else
> +		migrate = xe->tiles[0].migrate;
> +
> +	xe_assert(xe, migrate);
> +
> +	clear_fence = xe_migrate_clear(migrate, bo, bo->ttm.resource,
> +				       XE_MIGRATE_CLEAR_FLAG_FULL);
> +	if (IS_ERR(clear_fence))
> +		return NULL;
> +
> +	xe_tt->sys_clear_on_free = false;
> +
> +	return clear_fence;
> +}
> +
>  static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo)
>  {
>  	struct dma_resv_iter cursor;
> -	struct dma_fence *fence;
> +	struct dma_fence *clear_fence, *fence;
>  	struct dma_fence *replacement = NULL;
>  	struct xe_bo *bo;
>  
> @@ -1041,15 +1106,31 @@ static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo)
>  	bo = ttm_to_xe_bo(ttm_bo);
>  	xe_assert(xe_bo_device(bo), !(bo->created && kref_read(&ttm_bo->base.refcount)));
>  
> +	clear_fence = xe_ttm_bo_clear_on_free(ttm_bo);
> +
>  	/*
>  	 * Corner case where TTM fails to allocate memory and this BOs resv
>  	 * still points the VMs resv
>  	 */
> -	if (ttm_bo->base.resv != &ttm_bo->base._resv)
> +	if (ttm_bo->base.resv != &ttm_bo->base._resv) {
> +		if (clear_fence)
> +			dma_fence_wait(clear_fence, false);
>  		return;
> +	}
>  
> -	if (!xe_ttm_bo_lock_in_destructor(ttm_bo))
> +	if (!xe_ttm_bo_lock_in_destructor(ttm_bo)) {
> +		if (clear_fence)
> +			dma_fence_wait(clear_fence, false);
>  		return;
> +	}
> +
> +	if (clear_fence) {
> +		if (dma_resv_reserve_fences(ttm_bo->base.resv, 1))
> +			dma_fence_wait(clear_fence, false);
> +		else
> +			dma_resv_add_fence(ttm_bo->base.resv, clear_fence,
> +					   DMA_RESV_USAGE_KERNEL);
> +	}
>  
>  	/*
>  	 * Scrub the preempt fences if any. The unbind fence is already
> -- 
> 2.42.0
> 


More information about the Intel-xe mailing list