[PATCH 1/8] drm/ttm: Allow TTM LRU list nodes of different types

Thomas Hellström thomas.hellstrom at linux.intel.com
Mon Apr 8 12:45:18 UTC 2024


Hi, Christian.
Thanks for reviewing. 

On Fri, 2024-04-05 at 14:34 +0200, Christian König wrote:
> Am 29.03.24 um 15:57 schrieb Thomas Hellström:
> > To be able to handle list unlocking while traversing the LRU
> > list, we want the iterators not only to point to the next
> > position of the list traversal, but to insert themselves as
> > list nodes at that point to work around the fact that the
> > next node might otherwise disappear from the list while
> > the iterator is pointing to it.
> > 
> > These list nodes need to be easily distinguishable from other
> > list nodes so that others traversing the list can skip
> > over them.
> > 
> > So declare a struct ttm_lru_item, with a struct list_head member
> > and a type enum. This will slightly increase the size of a
> > struct ttm_resource.
> > 
> > v2:
> > - Update enum ttm_lru_item_type documentation.
> > 
> > Cc: Christian König <christian.koenig at amd.com>
> > Cc: Somalapuram Amaranath <Amaranath.Somalapuram at amd.com>
> > Cc: <dri-devel at lists.freedesktop.org>
> > Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> > ---
> >   drivers/gpu/drm/ttm/ttm_device.c   | 13 ++++--
> >   drivers/gpu/drm/ttm/ttm_resource.c | 70 ++++++++++++++++++++++---
> > -----
> >   include/drm/ttm/ttm_resource.h     | 51 +++++++++++++++++++++-
> >   3 files changed, 110 insertions(+), 24 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/ttm/ttm_device.c
> > b/drivers/gpu/drm/ttm/ttm_device.c
> > index 76027960054f..f27406e851e5 100644
> > --- a/drivers/gpu/drm/ttm/ttm_device.c
> > +++ b/drivers/gpu/drm/ttm/ttm_device.c
> > @@ -270,17 +270,22 @@ EXPORT_SYMBOL(ttm_device_fini);
> >   static void ttm_device_clear_lru_dma_mappings(struct ttm_device
> > *bdev,
> >   					      struct list_head
> > *list)
> >   {
> > -	struct ttm_resource *res;
> > +	struct ttm_lru_item *lru;
> >   
> >   	spin_lock(&bdev->lru_lock);
> > -	while ((res = list_first_entry_or_null(list, typeof(*res),
> > lru))) {
> > -		struct ttm_buffer_object *bo = res->bo;
> > +	while ((lru = list_first_entry_or_null(list, typeof(*lru),
> > link))) {
> > +		struct ttm_buffer_object *bo;
> > +
> > +		if (!ttm_lru_item_is_res(lru))
> > +			continue;
> 
> We should probably define some ttm_lru_for_each_res() and 
> ttm_lru_for_each_res_safe() to iterate over the LRU and abstract the 
> whole thing much better.

We actually have that already, ttm_resource_manager_for_each_res(), and
later in the series a more elaborate lru list walker that also takes
the object lock based on how the struct ttm_operation_context is set
up. (xe_ttm_lru_walk_for_evict())

At this stage, unless done like above, the list traversal isn't safe
against unlocking the lru list, so any change here IMO needs to be done
as a follow up patch.

> 
> 
> 
> > +
> > +		bo = ttm_lru_item_to_res(lru)->bo;
> >   
> >   		/* Take ref against racing releases once lru_lock
> > is unlocked */
> >   		if (!ttm_bo_get_unless_zero(bo))
> >   			continue;
> >   
> > -		list_del_init(&res->lru);
> > +		list_del_init(&bo->resource->lru.link);
> >   		spin_unlock(&bdev->lru_lock);
> >   
> >   		if (bo->ttm)
> > diff --git a/drivers/gpu/drm/ttm/ttm_resource.c
> > b/drivers/gpu/drm/ttm/ttm_resource.c
> > index be8d286513f9..7aa5ca5c0e33 100644
> > --- a/drivers/gpu/drm/ttm/ttm_resource.c
> > +++ b/drivers/gpu/drm/ttm/ttm_resource.c
> > @@ -69,8 +69,8 @@ void ttm_lru_bulk_move_tail(struct
> > ttm_lru_bulk_move *bulk)
> >   			dma_resv_assert_held(pos->last->bo-
> > >base.resv);
> >   
> >   			man = ttm_manager_type(pos->first->bo-
> > >bdev, i);
> > -			list_bulk_move_tail(&man->lru[j], &pos-
> > >first->lru,
> > -					    &pos->last->lru);
> > +			list_bulk_move_tail(&man->lru[j], &pos-
> > >first->lru.link,
> > +					    &pos->last->lru.link);
> >   		}
> >   	}
> >   }
> > @@ -83,14 +83,38 @@ ttm_lru_bulk_move_pos(struct ttm_lru_bulk_move
> > *bulk, struct ttm_resource *res)
> >   	return &bulk->pos[res->mem_type][res->bo->priority];
> >   }
> >   
> > +/* Return the previous resource on the list (skip over non-
> > resource list items) */
> > +static struct ttm_resource *ttm_lru_prev_res(struct ttm_resource
> > *cur)
> > +{
> > +	struct ttm_lru_item *lru = &cur->lru;
> > +
> > +	do {
> > +		lru = list_prev_entry(lru, link);
> > +	} while (!ttm_lru_item_is_res(lru));
> > +
> > +	return ttm_lru_item_to_res(lru);
> > +}
> > +
> > +/* Return the next resource on the list (skip over non-resource
> > list items) */
> > +static struct ttm_resource *ttm_lru_next_res(struct ttm_resource
> > *cur)
> > +{
> > +	struct ttm_lru_item *lru = &cur->lru;
> > +
> > +	do {
> > +		lru = list_next_entry(lru, link);
> > +	} while (!ttm_lru_item_is_res(lru));
> > +
> > +	return ttm_lru_item_to_res(lru);
> > +}
> > +
> >   /* Move the resource to the tail of the bulk move range */
> >   static void ttm_lru_bulk_move_pos_tail(struct
> > ttm_lru_bulk_move_pos *pos,
> >   				       struct ttm_resource *res)
> >   {
> >   	if (pos->last != res) {
> >   		if (pos->first == res)
> > -			pos->first = list_next_entry(res, lru);
> > -		list_move(&res->lru, &pos->last->lru);
> > +			pos->first = ttm_lru_next_res(res);
> > +		list_move(&res->lru.link, &pos->last->lru.link);
> >   		pos->last = res;
> >   	}
> >   }
> > @@ -121,11 +145,11 @@ static void ttm_lru_bulk_move_del(struct
> > ttm_lru_bulk_move *bulk,
> >   		pos->first = NULL;
> >   		pos->last = NULL;
> >   	} else if (pos->first == res) {
> > -		pos->first = list_next_entry(res, lru);
> > +		pos->first = ttm_lru_next_res(res);
> >   	} else if (pos->last == res) {
> > -		pos->last = list_prev_entry(res, lru);
> > +		pos->last = ttm_lru_prev_res(res);
> >   	} else {
> > -		list_move(&res->lru, &pos->last->lru);
> > +		list_move(&res->lru.link, &pos->last->lru.link);
> >   	}
> >   }
> >   
> > @@ -154,7 +178,7 @@ void ttm_resource_move_to_lru_tail(struct
> > ttm_resource *res)
> >   	lockdep_assert_held(&bo->bdev->lru_lock);
> >   
> >   	if (bo->pin_count) {
> > -		list_move_tail(&res->lru, &bdev->pinned);
> > +		list_move_tail(&res->lru.link, &bdev->pinned);
> >   
> >   	} else	if (bo->bulk_move) {
> >   		struct ttm_lru_bulk_move_pos *pos =
> > @@ -165,7 +189,7 @@ void ttm_resource_move_to_lru_tail(struct
> > ttm_resource *res)
> >   		struct ttm_resource_manager *man;
> >   
> >   		man = ttm_manager_type(bdev, res->mem_type);
> > -		list_move_tail(&res->lru, &man->lru[bo-
> > >priority]);
> > +		list_move_tail(&res->lru.link, &man->lru[bo-
> > >priority]);
> >   	}
> >   }
> >   
> > @@ -196,9 +220,9 @@ void ttm_resource_init(struct ttm_buffer_object
> > *bo,
> >   	man = ttm_manager_type(bo->bdev, place->mem_type);
> >   	spin_lock(&bo->bdev->lru_lock);
> >   	if (bo->pin_count)
> > -		list_add_tail(&res->lru, &bo->bdev->pinned);
> > +		list_add_tail(&res->lru.link, &bo->bdev->pinned);
> >   	else
> > -		list_add_tail(&res->lru, &man->lru[bo->priority]);
> > +		list_add_tail(&res->lru.link, &man->lru[bo-
> > >priority]);
> >   	man->usage += res->size;
> >   	spin_unlock(&bo->bdev->lru_lock);
> >   }
> > @@ -220,7 +244,7 @@ void ttm_resource_fini(struct
> > ttm_resource_manager *man,
> >   	struct ttm_device *bdev = man->bdev;
> >   
> >   	spin_lock(&bdev->lru_lock);
> > -	list_del_init(&res->lru);
> > +	list_del_init(&res->lru.link);
> >   	man->usage -= res->size;
> >   	spin_unlock(&bdev->lru_lock);
> >   }
> > @@ -471,14 +495,16 @@ struct ttm_resource *
> >   ttm_resource_manager_first(struct ttm_resource_manager *man,
> >   			   struct ttm_resource_cursor *cursor)
> >   {
> > -	struct ttm_resource *res;
> > +	struct ttm_lru_item *lru;
> >   
> >   	lockdep_assert_held(&man->bdev->lru_lock);
> >   
> >   	for (cursor->priority = 0; cursor->priority <
> > TTM_MAX_BO_PRIORITY;
> >   	     ++cursor->priority)
> > -		list_for_each_entry(res, &man->lru[cursor-
> > >priority], lru)
> > -			return res;
> > +		list_for_each_entry(lru, &man->lru[cursor-
> > >priority], link) {
> > +			if (ttm_lru_item_is_res(lru))
> > +				return ttm_lru_item_to_res(lru);
> > +		}
> >   
> >   	return NULL;
> >   }
> > @@ -497,15 +523,21 @@ ttm_resource_manager_next(struct
> > ttm_resource_manager *man,
> >   			  struct ttm_resource_cursor *cursor,
> >   			  struct ttm_resource *res)
> >   {
> > +	struct ttm_lru_item *lru = &res->lru;
> > +
> >   	lockdep_assert_held(&man->bdev->lru_lock);
> >   
> > -	list_for_each_entry_continue(res, &man->lru[cursor-
> > >priority], lru)
> > -		return res;
> > +	list_for_each_entry_continue(lru, &man->lru[cursor-
> > >priority], link) {
> > +		if (ttm_lru_item_is_res(lru))
> > +			return ttm_lru_item_to_res(lru);
> > +	}
> >   
> >   	for (++cursor->priority; cursor->priority <
> > TTM_MAX_BO_PRIORITY;
> >   	     ++cursor->priority)
> > -		list_for_each_entry(res, &man->lru[cursor-
> > >priority], lru)
> > -			return res;
> > +		list_for_each_entry(lru, &man->lru[cursor-
> > >priority], link) {
> > +			if (ttm_lru_item_is_res(lru))
> > +				ttm_lru_item_to_res(lru);
> > +		}
> >   
> >   	return NULL;
> >   }
> > diff --git a/include/drm/ttm/ttm_resource.h
> > b/include/drm/ttm/ttm_resource.h
> > index 69769355139f..4babc4ff10b0 100644
> > --- a/include/drm/ttm/ttm_resource.h
> > +++ b/include/drm/ttm/ttm_resource.h
> > @@ -49,6 +49,43 @@ struct io_mapping;
> >   struct sg_table;
> >   struct scatterlist;
> >   
> > +/**
> > + * enum ttm_lru_item_type - enumerate ttm_lru_item subclasses
> > + */
> > +enum ttm_lru_item_type {
> > +	/** @TTM_LRU_RESOURCE: The resource subclass */
> > +	TTM_LRU_RESOURCE,
> > +	/** @TTM_LRU_HITCH: The iterator hitch subclass */
> > +	TTM_LRU_HITCH
> 
> I'm really wondering we we can't use a special value for mem_type for
> the iterator hitch instead.

We can use a special TTM resource. I was thinking back and forth about
this for the initial series. But landed on this solution for two
reasons:

1) Code separation: I think we want to make the lru traversal self-
contained if possible. (Thinking forward about Oak's work on splitting
out the TTM LRU traversal) and future possible merging with the drm gem
LRU traversal.

2) Maintainability. If we (ab)use the ttm resources as hitches, this
might be confusing to people reading the code.

I don't have a very strong opinion on this, though. If you think
otherwise we can embed a struct ttm_resource in the cursor.

/Thomas
 


> 
> Regards,
> Christian.
> 
> > +};
> > +
> > +/**
> > + * struct ttm_lru_item - The TTM lru list node base class
> > + * @link: The list link
> > + * @type: The subclass type
> > + */
> > +struct ttm_lru_item {
> > +	struct list_head link;
> > +	enum ttm_lru_item_type type;
> > +};
> > +
> > +/**
> > + * ttm_lru_item_init() - initialize a struct ttm_lru_item
> > + * @item: The item to initialize
> > + * @type: The subclass type
> > + */
> > +static inline void ttm_lru_item_init(struct ttm_lru_item *item,
> > +				     enum ttm_lru_item_type type)
> > +{
> > +	item->type = type;
> > +	INIT_LIST_HEAD(&item->link);
> > +}
> > +
> > +static inline bool ttm_lru_item_is_res(const struct ttm_lru_item
> > *item)
> > +{
> > +	return item->type == TTM_LRU_RESOURCE;
> > +}
> > +
> >   struct ttm_resource_manager_func {
> >   	/**
> >   	 * struct ttm_resource_manager_func member alloc
> > @@ -217,9 +254,21 @@ struct ttm_resource {
> >   	/**
> >   	 * @lru: Least recently used list, see
> > &ttm_resource_manager.lru
> >   	 */
> > -	struct list_head lru;
> > +	struct ttm_lru_item lru;
> >   };
> >   
> > +/**
> > + * ttm_lru_item_to_res() - Downcast a struct ttm_lru_item to a
> > struct ttm_resource
> > + * @item: The struct ttm_lru_item to downcast
> > + *
> > + * Return: Pointer to the embedding struct ttm_resource
> > + */
> > +static inline struct ttm_resource *
> > +ttm_lru_item_to_res(struct ttm_lru_item *item)
> > +{
> > +	return container_of(item, struct ttm_resource, lru);
> > +}
> > +
> >   /**
> >    * struct ttm_resource_cursor
> >    *
> 



More information about the dri-devel mailing list