[PATCH 6/6] drm/ttm: replace TTMs refcount with the DRM refcount

Tue Jun 17 10:18:06 UTC 2025

Hi, Christian,

On Mon, 2025-06-16 at 15:07 +0200, Christian König wrote:
> Instead of keeping a separate reference count for the TTM object also
> use
> the reference count for DRM GEM objects inside TTM.
> 
> Apart from avoiding two reference counts for one object this approach
> has
> the clear advantage of being able to use drm_exec inside TTM.

A couple of questions on the design direction here:

IIRC both xe and i915 has checks to consider objects with a 0 gem
refcount as zombies requiring special treatment or skipping, when
encountered in TTM callbacks. We need to double-check that.

But I wonder, 
first this practice of resurrecting refcounts seem a bit unusual, I
wonder if we can get rid of that somehow?

Furthermore, it seems the problem with drm_exec is related only to the
LRU walk. What about adding a struct completion to the object, that is
signaled when the object has freed its final backing-store. The LRU
walk would then check if the object is a zombie, and if so just wait on
the struct completion. (Need of course to carefully set up locking
orders). Then we wouldn't need to resurrect the gem refcount, nor use
drm_exec locking for zombies.

We would still need some form of refcounting while waiting on the
struct completion, but if we restricted the TTM refcount to *only* be
used internally for that sole purpose, and also replaced the final
ttm_bo_put() with the ttm_bo_finalize() that you suggest we wouldn't
need to resurrect that refcount since it wouldn't drop to zero until
the object is ready for final free.

Ideas, comments?

Thomas

> 
> Signed-off-by: Christian König <christian.koenig at amd.com>
> ---
>  .../gpu/drm/ttm/tests/ttm_bo_validate_test.c  |   4 +-
>  drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c |   2 -
>  drivers/gpu/drm/ttm/ttm_bo.c                  | 146 +++++++++-------
> --
>  drivers/gpu/drm/ttm/ttm_bo_internal.h         |   9 +-
>  drivers/gpu/drm/ttm/ttm_bo_util.c             |   2 +-
>  include/drm/ttm/ttm_bo.h                      |   9 --
>  6 files changed, 81 insertions(+), 91 deletions(-)
> 
> diff --git a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c
> b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c
> index 6766e1753343..7b908920aae5 100644
> --- a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c
> +++ b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c
> @@ -127,7 +127,7 @@ static void ttm_bo_init_reserved_sys_man(struct
> kunit *test)
>  	dma_resv_unlock(bo->base.resv);
>  
>  	KUNIT_EXPECT_EQ(test, err, 0);
> -	KUNIT_EXPECT_EQ(test, kref_read(&bo->kref), 1);
> +	KUNIT_EXPECT_EQ(test, kref_read(&bo->base.refcount), 1);
>  	KUNIT_EXPECT_PTR_EQ(test, bo->bdev, priv->ttm_dev);
>  	KUNIT_EXPECT_EQ(test, bo->type, bo_type);
>  	KUNIT_EXPECT_EQ(test, bo->page_alignment, PAGE_SIZE);
> @@ -176,7 +176,7 @@ static void ttm_bo_init_reserved_mock_man(struct
> kunit *test)
>  	dma_resv_unlock(bo->base.resv);
>  
>  	KUNIT_EXPECT_EQ(test, err, 0);
> -	KUNIT_EXPECT_EQ(test, kref_read(&bo->kref), 1);
> +	KUNIT_EXPECT_EQ(test, kref_read(&bo->base.refcount), 1);
>  	KUNIT_EXPECT_PTR_EQ(test, bo->bdev, priv->ttm_dev);
>  	KUNIT_EXPECT_EQ(test, bo->type, bo_type);
>  	KUNIT_EXPECT_EQ(test, ctx.bytes_moved, size);
> diff --git a/drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c
> b/drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c
> index b91c13f46225..7bc8eae77b8c 100644
> --- a/drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c
> +++ b/drivers/gpu/drm/ttm/tests/ttm_kunit_helpers.c
> @@ -190,8 +190,6 @@ struct ttm_buffer_object
> *ttm_bo_kunit_init(struct kunit *test,
>  	bo->bdev = devs->ttm_dev;
>  	bo->destroy = dummy_ttm_bo_destroy;
>  
> -	kref_init(&bo->kref);
> -
>  	return bo;
>  }
>  EXPORT_SYMBOL_GPL(ttm_bo_kunit_init);
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c
> b/drivers/gpu/drm/ttm/ttm_bo.c
> index abcf45bc3930..071cbad2fe9e 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -48,14 +48,6 @@
>  #include "ttm_module.h"
>  #include "ttm_bo_internal.h"
>  
> -static void ttm_bo_release(struct kref *kref);
> -
> -/* TODO: remove! */
> -void ttm_bo_put(struct ttm_buffer_object *bo)
> -{
> -	kref_put(&bo->kref, ttm_bo_release);
> -}
> -
>  static void ttm_bo_mem_space_debug(struct ttm_buffer_object *bo,
>  					struct ttm_placement
> *placement)
>  {
> @@ -252,82 +244,91 @@ static void ttm_bo_delayed_delete(struct
> work_struct *work)
>  	ttm_bo_put(bo);
>  }
>  
> -static void ttm_bo_release(struct kref *kref)
> +static void ttm_bo_free(struct drm_gem_object *gobj)
> +{
> +	struct ttm_buffer_object *bo = container_of(gobj,
> typeof(*bo), base);
> +
> +	atomic_dec(&ttm_glob.bo_count);
> +	bo->destroy(bo);
> +}
> +
> +/*
> + * All other callbacks should never ever be called on a deleted TTM
> object.
> + */
> +static const struct drm_gem_object_funcs ttm_deleted_object_funcs =
> {
> +	.free = ttm_bo_free
> +};
> +
> +/* Returns true if the BO is about to get deleted */
> +static bool ttm_bo_is_zombie(struct ttm_buffer_object *bo)
> +{
> +	return bo->base.funcs == &ttm_deleted_object_funcs;
> +}
> +
> +void ttm_bo_fini(struct ttm_buffer_object *bo)
>  {
> -	struct ttm_buffer_object *bo =
> -	    container_of(kref, struct ttm_buffer_object, kref);
>  	struct ttm_device *bdev = bo->bdev;
>  	int ret;
>  
>  	WARN_ON_ONCE(bo->pin_count);
>  	WARN_ON_ONCE(bo->bulk_move);
>  
> -	if (!bo->deleted) {
> -		ret = ttm_bo_individualize_resv(bo);
> -		if (ret) {
> -			/* Last resort, if we fail to allocate
> memory for the
> -			 * fences block for the BO to become idle
> -			 */
> -			dma_resv_wait_timeout(bo->base.resv,
> -					     
> DMA_RESV_USAGE_BOOKKEEP, false,
> -					      30 * HZ);
> -		}
> +	ret = ttm_bo_individualize_resv(bo);
> +	if (ret) {
> +		/* Last resort, if we fail to allocate memory for
> the
> +		 * fences block for the BO to become idle
> +		 */
> +		dma_resv_wait_timeout(bo->base.resv,
> DMA_RESV_USAGE_BOOKKEEP,
> +				      false, 30 * HZ);
> +	}
>  
> -		if (bo->bdev->funcs->release_notify)
> -			bo->bdev->funcs->release_notify(bo);
> -
> -		drm_vma_offset_remove(bdev->vma_manager, &bo-
> >base.vma_node);
> -		ttm_mem_io_free(bdev, bo->resource);
> -
> -		if (!dma_resv_test_signaled(&bo->base._resv,
> -					    DMA_RESV_USAGE_BOOKKEEP)
> ||
> -		    (want_init_on_free() && (bo->ttm != NULL)) ||
> -		    bo->type == ttm_bo_type_sg ||
> -		    !dma_resv_trylock(bo->base.resv)) {
> -			/* The BO is not idle, resurrect it for
> delayed destroy */
> -			ttm_bo_flush_all_fences(bo);
> -			bo->deleted = true;
> -
> -			spin_lock(&bo->bdev->lru_lock);
> -
> -			/*
> -			 * Make pinned bos immediately available to
> -			 * shrinkers, now that they are queued for
> -			 * destruction.
> -			 *
> -			 * FIXME: QXL is triggering this. Can be
> removed when the
> -			 * driver is fixed.
> -			 */
> -			if (bo->pin_count) {
> -				bo->pin_count = 0;
> -				ttm_resource_move_to_lru_tail(bo-
> >resource);
> -			}
> +	if (bo->bdev->funcs->release_notify)
> +		bo->bdev->funcs->release_notify(bo);
> +
> +	drm_vma_offset_remove(bdev->vma_manager, &bo-
> >base.vma_node);
> +	ttm_mem_io_free(bdev, bo->resource);
>  
> -			kref_init(&bo->kref);
> -			spin_unlock(&bo->bdev->lru_lock);
> +	if (!dma_resv_test_signaled(&bo->base._resv,
> DMA_RESV_USAGE_BOOKKEEP) ||
> +	    (want_init_on_free() && (bo->ttm != NULL)) ||
> +	    bo->type == ttm_bo_type_sg ||
> +	    !dma_resv_trylock(bo->base.resv)) {
> +		/* The BO is not idle, resurrect it for delayed
> destroy */
> +		ttm_bo_flush_all_fences(bo);
>  
> -			INIT_WORK(&bo->delayed_delete,
> ttm_bo_delayed_delete);
> +		spin_lock(&bo->bdev->lru_lock);
>  
> -			/* Schedule the worker on the closest NUMA
> node. This
> -			 * improves performance since system memory
> might be
> -			 * cleared on free and that is best done on
> a CPU core
> -			 * close to it.
> -			 */
> -			queue_work_node(bdev->pool.nid, bdev->wq,
> &bo->delayed_delete);
> -			return;
> +		/*
> +		 * Make pinned bos immediately available to
> +		 * shrinkers, now that they are queued for
> +		 * destruction.
> +		 *
> +		 * FIXME: QXL is triggering this. Can be removed
> when the
> +		 * driver is fixed.
> +		 */
> +		if (bo->pin_count) {
> +			bo->pin_count = 0;
> +			ttm_resource_move_to_lru_tail(bo->resource);
>  		}
>  
> +		kref_init(&bo->base.refcount);
> +		bo->base.funcs = &ttm_deleted_object_funcs;
> +		spin_unlock(&bo->bdev->lru_lock);
> +
> +		INIT_WORK(&bo->delayed_delete,
> ttm_bo_delayed_delete);
> +
> +		/* Schedule the worker on the closest NUMA node.
> This
> +		 * improves performance since system memory might be
> +		 * cleared on free and that is best done on a CPU
> core
> +		 * close to it.
> +		 */
> +		queue_work_node(bdev->pool.nid, bdev->wq, &bo-
> >delayed_delete);
> +	} else {
>  		ttm_bo_cleanup_memtype_use(bo);
>  		dma_resv_unlock(bo->base.resv);
> -	}
>  
> -	atomic_dec(&ttm_glob.bo_count);
> -	bo->destroy(bo);
> -}
> -
> -void ttm_bo_fini(struct ttm_buffer_object *bo)
> -{
> -	ttm_bo_put(bo);
> +		atomic_dec(&ttm_glob.bo_count);
> +		bo->destroy(bo);
> +	}
>  }
>  EXPORT_SYMBOL(ttm_bo_fini);
>  
> @@ -471,7 +472,7 @@ int ttm_bo_evict_first(struct ttm_device *bdev,
> struct ttm_resource_manager *man
>  	if (!bo->resource || bo->resource->mem_type != mem_type)
>  		goto out_bo_moved;
>  
> -	if (bo->deleted) {
> +	if (ttm_bo_is_zombie(bo)) {
>  		ret = ttm_bo_wait_ctx(bo, ctx);
>  		if (!ret)
>  			ttm_bo_cleanup_memtype_use(bo);
> @@ -525,7 +526,7 @@ static s64 ttm_bo_evict_cb(struct ttm_lru_walk
> *walk, struct ttm_buffer_object *
>  	if (bo->pin_count || !bo->bdev->funcs->eviction_valuable(bo,
> evict_walk->place))
>  		return 0;
>  
> -	if (bo->deleted) {
> +	if (ttm_bo_is_zombie(bo)) {
>  		lret = ttm_bo_wait_ctx(bo, walk->ctx);
>  		if (!lret)
>  			ttm_bo_cleanup_memtype_use(bo);
> @@ -623,7 +624,6 @@ static int ttm_bo_evict_alloc(struct ttm_device
> *bdev,
>  void ttm_bo_pin(struct ttm_buffer_object *bo)
>  {
>  	dma_resv_assert_held(bo->base.resv);
> -	WARN_ON_ONCE(!kref_read(&bo->kref));
>  	spin_lock(&bo->bdev->lru_lock);
>  	if (bo->resource)
>  		ttm_resource_del_bulk_move(bo->resource, bo);
> @@ -642,7 +642,6 @@ EXPORT_SYMBOL(ttm_bo_pin);
>  void ttm_bo_unpin(struct ttm_buffer_object *bo)
>  {
>  	dma_resv_assert_held(bo->base.resv);
> -	WARN_ON_ONCE(!kref_read(&bo->kref));
>  	if (WARN_ON_ONCE(!bo->pin_count))
>  		return;
>  
> @@ -931,7 +930,6 @@ int ttm_bo_init_reserved(struct ttm_device *bdev,
> struct ttm_buffer_object *bo,
>  {
>  	int ret;
>  
> -	kref_init(&bo->kref);
>  	bo->bdev = bdev;
>  	bo->type = type;
>  	bo->page_alignment = alignment;
> @@ -1127,7 +1125,7 @@ ttm_bo_swapout_cb(struct ttm_lru_walk *walk,
> struct ttm_buffer_object *bo)
>  		goto out;
>  	}
>  
> -	if (bo->deleted) {
> +	if (ttm_bo_is_zombie(bo)) {
>  		pgoff_t num_pages = bo->ttm->num_pages;
>  
>  		ret = ttm_bo_wait_ctx(bo, ctx);
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_internal.h
> b/drivers/gpu/drm/ttm/ttm_bo_internal.h
> index e0d48eac74b0..81327bc73834 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_internal.h
> +++ b/drivers/gpu/drm/ttm/ttm_bo_internal.h
> @@ -34,7 +34,7 @@
>   */
>  static inline void ttm_bo_get(struct ttm_buffer_object *bo)
>  {
> -	kref_get(&bo->kref);
> +	drm_gem_object_get(&bo->base);
>  }
>  
>  /**
> @@ -50,11 +50,14 @@ static inline void ttm_bo_get(struct
> ttm_buffer_object *bo)
>  static inline __must_check struct ttm_buffer_object *
>  ttm_bo_get_unless_zero(struct ttm_buffer_object *bo)
>  {
> -	if (!kref_get_unless_zero(&bo->kref))
> +	if (!kref_get_unless_zero(&bo->base.refcount))
>  		return NULL;
>  	return bo;
>  }
>  
> -void ttm_bo_put(struct ttm_buffer_object *bo);
> +static inline void ttm_bo_put(struct ttm_buffer_object *bo)
> +{
> +	drm_gem_object_put(&bo->base);
> +}
>  
>  #endif
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c
> b/drivers/gpu/drm/ttm/ttm_bo_util.c
> index 56f3152f34f5..56645039757e 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
> @@ -245,7 +245,7 @@ static int ttm_buffer_object_transfer(struct
> ttm_buffer_object *bo,
>  	atomic_inc(&ttm_glob.bo_count);
>  	drm_vma_node_reset(&fbo->base.base.vma_node);
>  
> -	kref_init(&fbo->base.kref);
> +	kref_init(&fbo->base.base.refcount);
>  	fbo->base.destroy = &ttm_transfered_destroy;
>  	fbo->base.pin_count = 0;
>  	if (bo->type != ttm_bo_type_sg)
> diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h
> index 4b0552d1bc55..4fe4031f0165 100644
> --- a/include/drm/ttm/ttm_bo.h
> +++ b/include/drm/ttm/ttm_bo.h
> @@ -78,11 +78,8 @@ enum ttm_bo_type {
>   * @type: The bo type.
>   * @page_alignment: Page alignment.
>   * @destroy: Destruction function. If NULL, kfree is used.
> - * @kref: Reference count of this buffer object. When this refcount
> reaches
> - * zero, the object is destroyed or put on the delayed delete list.
>   * @resource: structure describing current placement.
>   * @ttm: TTM structure holding system pages.
> - * @deleted: True if the object is only a zombie and already
> deleted.
>   * @bulk_move: The bulk move object.
>   * @priority: Priority for LRU, BOs with lower priority are evicted
> first.
>   * @pin_count: Pin count.
> @@ -109,17 +106,11 @@ struct ttm_buffer_object {
>  	uint32_t page_alignment;
>  	void (*destroy) (struct ttm_buffer_object *);
>  
> -	/*
> -	* Members not needing protection.
> -	*/
> -	struct kref kref;
> -
>  	/*
>  	 * Members protected by the bo::resv::reserved lock.
>  	 */
>  	struct ttm_resource *resource;
>  	struct ttm_tt *ttm;
> -	bool deleted;
>  	struct ttm_lru_bulk_move *bulk_move;
>  	unsigned priority;
>  	unsigned pin_count;