[PATCH v2 3/3] drm/ttm, drm_xe, Implement ttm_lru_walk_for_evict() using the guarded LRU iteration

Thomas Hellström thomas.hellstrom at linux.intel.com
Thu Jun 26 13:00:07 UTC 2025


On Thu, 2025-06-26 at 13:06 +0200, Christian König wrote:
> On 23.06.25 17:53, Thomas Hellström wrote:
> > To avoid duplicating the tricky bo locking implementation,
> > Implement ttm_lru_walk_for_evict() using the guarded bo LRU
> > iteration.
> > 
> > To facilitate this, support ticketlocking from the guarded bo LRU
> > iteration.
> > 
> > v2:
> > - Clean up some static function interfaces (Christian König)
> > - Fix Handling -EALREADY from ticketlocking in the loop by
> >   skipping to the next item. (Intel CI)
> > 
> > Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> 
> I have a cold at the moment so I might have missed something, but
> still feel free to add Reviewed-by: Christian König
> <christian.koenig at amd.com>.

Thanks, I'll likely be merging this through drm-misc-next.
/Thomas


> 
> > ---
> >  drivers/gpu/drm/ttm/ttm_bo_util.c | 188 ++++++++++++--------------
> > ----
> >  drivers/gpu/drm/xe/xe_shrinker.c  |   7 +-
> >  include/drm/ttm/ttm_bo.h          |   9 +-
> >  3 files changed, 88 insertions(+), 116 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c
> > b/drivers/gpu/drm/ttm/ttm_bo_util.c
> > index 6de1f0c432c2..250675d56b1c 100644
> > --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
> > +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
> > @@ -773,16 +773,15 @@ int ttm_bo_pipeline_gutting(struct
> > ttm_buffer_object *bo)
> >  	return ret;
> >  }
> >  
> > -static bool ttm_lru_walk_trylock(struct ttm_lru_walk_arg *arg,
> > -				 struct ttm_buffer_object *bo,
> > -				 bool *needs_unlock)
> > +static bool ttm_lru_walk_trylock(struct ttm_bo_lru_cursor *curs,
> > +				 struct ttm_buffer_object *bo)
> >  {
> > -	struct ttm_operation_ctx *ctx = arg->ctx;
> > +	struct ttm_operation_ctx *ctx = curs->arg->ctx;
> >  
> > -	*needs_unlock = false;
> > +	curs->needs_unlock = false;
> >  
> >  	if (dma_resv_trylock(bo->base.resv)) {
> > -		*needs_unlock = true;
> > +		curs->needs_unlock = true;
> >  		return true;
> >  	}
> >  
> > @@ -794,10 +793,10 @@ static bool ttm_lru_walk_trylock(struct
> > ttm_lru_walk_arg *arg,
> >  	return false;
> >  }
> >  
> > -static int ttm_lru_walk_ticketlock(struct ttm_lru_walk_arg *arg,
> > -				   struct ttm_buffer_object *bo,
> > -				   bool *needs_unlock)
> > +static int ttm_lru_walk_ticketlock(struct ttm_bo_lru_cursor *curs,
> > +				   struct ttm_buffer_object *bo)
> >  {
> > +	struct ttm_lru_walk_arg *arg = curs->arg;
> >  	struct dma_resv *resv = bo->base.resv;
> >  	int ret;
> >  
> > @@ -807,7 +806,7 @@ static int ttm_lru_walk_ticketlock(struct
> > ttm_lru_walk_arg *arg,
> >  		ret = dma_resv_lock(resv, arg->ticket);
> >  
> >  	if (!ret) {
> > -		*needs_unlock = true;
> > +		curs->needs_unlock = true;
> >  		/*
> >  		 * Only a single ticketlock per loop. Ticketlocks
> > are prone
> >  		 * to return -EDEADLK causing the eviction to
> > fail, so
> > @@ -823,12 +822,6 @@ static int ttm_lru_walk_ticketlock(struct
> > ttm_lru_walk_arg *arg,
> >  	return ret;
> >  }
> >  
> > -static void ttm_lru_walk_unlock(struct ttm_buffer_object *bo, bool
> > locked)
> > -{
> > -	if (locked)
> > -		dma_resv_unlock(bo->base.resv);
> > -}
> > -
> >  /**
> >   * ttm_lru_walk_for_evict() - Perform a LRU list walk, with
> > actions taken on
> >   * valid items.
> > @@ -863,64 +856,21 @@ static void ttm_lru_walk_unlock(struct
> > ttm_buffer_object *bo, bool locked)
> >  s64 ttm_lru_walk_for_evict(struct ttm_lru_walk *walk, struct
> > ttm_device *bdev,
> >  			   struct ttm_resource_manager *man, s64
> > target)
> >  {
> > -	struct ttm_resource_cursor cursor;
> > -	struct ttm_resource *res;
> > +	struct ttm_bo_lru_cursor cursor;
> > +	struct ttm_buffer_object *bo;
> >  	s64 progress = 0;
> >  	s64 lret;
> >  
> > -	spin_lock(&bdev->lru_lock);
> > -	ttm_resource_cursor_init(&cursor, man);
> > -	ttm_resource_manager_for_each_res(&cursor, res) {
> > -		struct ttm_buffer_object *bo = res->bo;
> > -		bool bo_needs_unlock = false;
> > -		bool bo_locked = false;
> > -		int mem_type;
> > -
> > -		/*
> > -		 * Attempt a trylock before taking a reference on
> > the bo,
> > -		 * since if we do it the other way around, and the
> > trylock fails,
> > -		 * we need to drop the lru lock to put the bo.
> > -		 */
> > -		if (ttm_lru_walk_trylock(&walk->arg, bo,
> > &bo_needs_unlock))
> > -			bo_locked = true;
> > -		else if (!walk->arg.ticket || walk->arg.ctx-
> > >no_wait_gpu ||
> > -			 walk->arg.trylock_only)
> > -			continue;
> > -
> > -		if (!ttm_bo_get_unless_zero(bo)) {
> > -			ttm_lru_walk_unlock(bo, bo_needs_unlock);
> > -			continue;
> > -		}
> > -
> > -		mem_type = res->mem_type;
> > -		spin_unlock(&bdev->lru_lock);
> > -
> > -		lret = 0;
> > -		if (!bo_locked)
> > -			lret = ttm_lru_walk_ticketlock(&walk->arg,
> > bo, &bo_needs_unlock);
> > -
> > -		/*
> > -		 * Note that in between the release of the lru
> > lock and the
> > -		 * ticketlock, the bo may have switched resource,
> > -		 * and also memory type, since the resource may
> > have been
> > -		 * freed and allocated again with a different
> > memory type.
> > -		 * In that case, just skip it.
> > -		 */
> > -		if (!lret && bo->resource && bo->resource-
> > >mem_type == mem_type)
> > -			lret = walk->ops->process_bo(walk, bo);
> > -
> > -		ttm_lru_walk_unlock(bo, bo_needs_unlock);
> > -		ttm_bo_put(bo);
> > +	ttm_bo_lru_for_each_reserved_guarded(&cursor, man, &walk-
> > >arg, bo) {
> > +		lret = walk->ops->process_bo(walk, bo);
> >  		if (lret == -EBUSY || lret == -EALREADY)
> >  			lret = 0;
> >  		progress = (lret < 0) ? lret : progress + lret;
> > -
> > -		spin_lock(&bdev->lru_lock);
> >  		if (progress < 0 || progress >= target)
> >  			break;
> >  	}
> > -	ttm_resource_cursor_fini(&cursor);
> > -	spin_unlock(&bdev->lru_lock);
> > +	if (IS_ERR(bo))
> > +		return PTR_ERR(bo);
> >  
> >  	return progress;
> >  }
> > @@ -960,10 +910,7 @@ EXPORT_SYMBOL(ttm_bo_lru_cursor_fini);
> >   * @man: The ttm resource_manager whose LRU lists to iterate over.
> >   * @arg: The ttm_lru_walk_arg to govern the walk.
> >   *
> > - * Initialize a struct ttm_bo_lru_cursor. Currently only
> > trylocking
> > - * or prelocked buffer objects are available as detailed by
> > - * @arg->ctx.resv and @arg->ctx.allow_res_evict. Ticketlocking is
> > not
> > - * supported.
> > + * Initialize a struct ttm_bo_lru_cursor.
> >   *
> >   * Return: Pointer to @curs. The function does not fail.
> >   */
> > @@ -981,21 +928,67 @@ ttm_bo_lru_cursor_init(struct
> > ttm_bo_lru_cursor *curs,
> >  EXPORT_SYMBOL(ttm_bo_lru_cursor_init);
> >  
> >  static struct ttm_buffer_object *
> > -ttm_bo_from_res_reserved(struct ttm_resource *res, struct
> > ttm_bo_lru_cursor *curs)
> > +__ttm_bo_lru_cursor_next(struct ttm_bo_lru_cursor *curs)
> >  {
> > -	struct ttm_buffer_object *bo = res->bo;
> > +	spinlock_t *lru_lock = &curs->res_curs.man->bdev-
> > >lru_lock;
> > +	struct ttm_resource *res = NULL;
> > +	struct ttm_buffer_object *bo;
> > +	struct ttm_lru_walk_arg *arg = curs->arg;
> > +	bool first = !curs->bo;
> >  
> > -	if (!ttm_lru_walk_trylock(curs->arg, bo, &curs-
> > >needs_unlock))
> > -		return NULL;
> > +	ttm_bo_lru_cursor_cleanup_bo(curs);
> >  
> > -	if (!ttm_bo_get_unless_zero(bo)) {
> > -		if (curs->needs_unlock)
> > -			dma_resv_unlock(bo->base.resv);
> > -		return NULL;
> > +	spin_lock(lru_lock);
> > +	for (;;) {
> > +		int mem_type, ret = 0;
> > +		bool bo_locked = false;
> > +
> > +		if (first) {
> > +			res = ttm_resource_manager_first(&curs-
> > >res_curs);
> > +			first = false;
> > +		} else {
> > +			res = ttm_resource_manager_next(&curs-
> > >res_curs);
> > +		}
> > +		if (!res)
> > +			break;
> > +
> > +		bo = res->bo;
> > +		if (ttm_lru_walk_trylock(curs, bo))
> > +			bo_locked = true;
> > +		else if (!arg->ticket || arg->ctx->no_wait_gpu ||
> > arg->trylock_only)
> > +			continue;
> > +
> > +		if (!ttm_bo_get_unless_zero(bo)) {
> > +			if (curs->needs_unlock)
> > +				dma_resv_unlock(bo->base.resv);
> > +			continue;
> > +		}
> > +
> > +		mem_type = res->mem_type;
> > +		spin_unlock(lru_lock);
> > +		if (!bo_locked)
> > +			ret = ttm_lru_walk_ticketlock(curs, bo);
> > +
> > +		/*
> > +		 * Note that in between the release of the lru
> > lock and the
> > +		 * ticketlock, the bo may have switched resource,
> > +		 * and also memory type, since the resource may
> > have been
> > +		 * freed and allocated again with a different
> > memory type.
> > +		 * In that case, just skip it.
> > +		 */
> > +		curs->bo = bo;
> > +		if (!ret && bo->resource && bo->resource->mem_type
> > == mem_type)
> > +			return bo;
> > +
> > +		ttm_bo_lru_cursor_cleanup_bo(curs);
> > +		if (ret && ret != -EALREADY)
> > +			return ERR_PTR(ret);
> > +
> > +		spin_lock(lru_lock);
> >  	}
> >  
> > -	curs->bo = bo;
> > -	return bo;
> > +	spin_unlock(lru_lock);
> > +	return res ? bo : NULL;
> >  }
> >  
> >  /**
> > @@ -1009,25 +1002,7 @@ ttm_bo_from_res_reserved(struct ttm_resource
> > *res, struct ttm_bo_lru_cursor *cur
> >   */
> >  struct ttm_buffer_object *ttm_bo_lru_cursor_next(struct
> > ttm_bo_lru_cursor *curs)
> >  {
> > -	spinlock_t *lru_lock = &curs->res_curs.man->bdev-
> > >lru_lock;
> > -	struct ttm_resource *res = NULL;
> > -	struct ttm_buffer_object *bo;
> > -
> > -	ttm_bo_lru_cursor_cleanup_bo(curs);
> > -
> > -	spin_lock(lru_lock);
> > -	for (;;) {
> > -		res = ttm_resource_manager_next(&curs->res_curs);
> > -		if (!res)
> > -			break;
> > -
> > -		bo = ttm_bo_from_res_reserved(res, curs);
> > -		if (bo)
> > -			break;
> > -	}
> > -
> > -	spin_unlock(lru_lock);
> > -	return res ? bo : NULL;
> > +	return __ttm_bo_lru_cursor_next(curs);
> >  }
> >  EXPORT_SYMBOL(ttm_bo_lru_cursor_next);
> >  
> > @@ -1041,21 +1016,8 @@ EXPORT_SYMBOL(ttm_bo_lru_cursor_next);
> >   */
> >  struct ttm_buffer_object *ttm_bo_lru_cursor_first(struct
> > ttm_bo_lru_cursor *curs)
> >  {
> > -	spinlock_t *lru_lock = &curs->res_curs.man->bdev-
> > >lru_lock;
> > -	struct ttm_buffer_object *bo;
> > -	struct ttm_resource *res;
> > -
> > -	spin_lock(lru_lock);
> > -	res = ttm_resource_manager_first(&curs->res_curs);
> > -	if (!res) {
> > -		spin_unlock(lru_lock);
> > -		return NULL;
> > -	}
> > -
> > -	bo = ttm_bo_from_res_reserved(res, curs);
> > -	spin_unlock(lru_lock);
> > -
> > -	return bo ? bo : ttm_bo_lru_cursor_next(curs);
> > +	ttm_bo_lru_cursor_cleanup_bo(curs);
> > +	return __ttm_bo_lru_cursor_next(curs);
> >  }
> >  EXPORT_SYMBOL(ttm_bo_lru_cursor_first);
> >  
> > diff --git a/drivers/gpu/drm/xe/xe_shrinker.c
> > b/drivers/gpu/drm/xe/xe_shrinker.c
> > index f8a1129da2c3..1c3c04d52f55 100644
> > --- a/drivers/gpu/drm/xe/xe_shrinker.c
> > +++ b/drivers/gpu/drm/xe/xe_shrinker.c
> > @@ -66,7 +66,10 @@ static s64 xe_shrinker_walk(struct xe_device
> > *xe,
> >  		struct ttm_resource_manager *man =
> > ttm_manager_type(&xe->ttm, mem_type);
> >  		struct ttm_bo_lru_cursor curs;
> >  		struct ttm_buffer_object *ttm_bo;
> > -		struct ttm_lru_walk_arg arg = {.ctx = ctx};
> > +		struct ttm_lru_walk_arg arg = {
> > +			.ctx = ctx,
> > +			.trylock_only = true,
> > +		};
> >  
> >  		if (!man || !man->use_tt)
> >  			continue;
> > @@ -83,6 +86,8 @@ static s64 xe_shrinker_walk(struct xe_device *xe,
> >  			if (*scanned >= to_scan)
> >  				break;
> >  		}
> > +		/* Trylocks should never error, just fail. */
> > +		xe_assert(xe, !IS_ERR(ttm_bo));
> >  	}
> >  
> >  	return freed;
> > diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h
> > index ab9a6b343a53..894ff7ccd68e 100644
> > --- a/include/drm/ttm/ttm_bo.h
> > +++ b/include/drm/ttm/ttm_bo.h
> > @@ -529,10 +529,15 @@
> > class_ttm_bo_lru_cursor_lock_ptr(class_ttm_bo_lru_cursor_t *_T)
> >   * up at looping termination, even if terminated prematurely by,
> > for
> >   * example a return or break statement. Exiting the loop will also
> > unlock
> >   * (if needed) and unreference @_bo.
> > + *
> > + * Return: If locking of a bo returns an error, then iteration is
> > terminated
> > + * and @_bo is set to a corresponding error pointer. It's illegal
> > to
> > + * dereference @_bo after loop exit.
> >   */
> >  #define ttm_bo_lru_for_each_reserved_guarded(_cursor, _man, _arg,
> > _bo)	\
> >  	scoped_guard(ttm_bo_lru_cursor, _cursor, _man,
> > _arg)		\
> > -		for ((_bo) = ttm_bo_lru_cursor_first(_cursor);
> > (_bo);	\
> > -		     (_bo) = ttm_bo_lru_cursor_next(_cursor))
> > +		for ((_bo) =
> > ttm_bo_lru_cursor_first(_cursor);		\
> > +		      
> > !IS_ERR_OR_NULL(_bo);				\
> > +		       (_bo) = ttm_bo_lru_cursor_next(_cursor))
> >  
> >  #endif
> 



More information about the Intel-xe mailing list