[RFC PATCH] drm/ttm: Simplify the delayed destroy locking

Christian König christian.koenig at amd.com
Mon Apr 12 14:01:48 UTC 2021


Hi Thomas,

well in general a good idea, but I'm working on a different plan for a 
while now.

My idea here is that instead of the BO the resource object is kept on a 
double linked lru list.

The resource objects then have a pointer to either the BO or a fence object.

When it is a fence object we can just grab a reference to it and wait 
for it to complete.

If it is a BO we evict it the same way we currently do.

This allows to remove both the delayed delete, individualization of BOs, 
ghost objects etc...

Regards,
Christian.

Am 12.04.21 um 15:17 schrieb Thomas Hellström:
> This RFC needs some decent testing on a driver with bos that share
> reservation objects, and of course a check for whether I missed
> something obvious.
>
> The locking around delayed destroy is rather complex due to the fact
> that we want to individualize dma_resv pointers before putting the object
> on the delayed-destroy list. That individualization is currently protected
> by the lru lock forcing us to do try-reservations under the lru lock when
> reserving an object from the lru- or ddestroy lists, which complicates
> moving over to per-manager lru locks.
>
> Instead protect the individualization with the object kref == 0,
> and ensure kref != 0 before trying to reserve an object from the list.
> Remove the lru lock around individualization and protect the delayed
> destroy list with a separate spinlock. Isolate the delayed destroy list
> similarly to a lockless list before traversing it. Also don't try to drop
> resource references from the delayed destroy list traversal, but leave
> that to the final object release. The role of the delayed destroy thread
> will now simply be to try to idle the object and when idle, drop
> the delayed destoy reference.
>
> This should pave the way for per-manager lru lists, sleeping eviction locks
> that are kept, optional drm_mm eviction roster usage and moving over to a
> completely lockless delayed destroy list even if the traversal order
> will then change because there is no llist_add_tail().
>
> Cc: Christian König <christian.koenig at amd.com>
> Cc: Daniel Vetter <daniel at ffwll.ch>
> Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> ---
>   drivers/gpu/drm/ttm/ttm_bo.c     | 193 +++++++++++++------------------
>   drivers/gpu/drm/ttm/ttm_device.c |   1 +
>   include/drm/ttm/ttm_device.h     |   1 +
>   3 files changed, 82 insertions(+), 113 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 6ab7b66ce36d..fd57252bc8fe 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -217,6 +217,9 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo,
>   
>   static void ttm_bo_cleanup_memtype_use(struct ttm_buffer_object *bo)
>   {
> +	if (kref_read(&bo->kref))
> +		dma_resv_assert_held(bo->base.resv);
> +
>   	if (bo->bdev->funcs->delete_mem_notify)
>   		bo->bdev->funcs->delete_mem_notify(bo);
>   
> @@ -239,13 +242,18 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
>   		return r;
>   
>   	if (bo->type != ttm_bo_type_sg) {
> -		/* This works because the BO is about to be destroyed and nobody
> -		 * reference it any more. The only tricky case is the trylock on
> -		 * the resv object while holding the lru_lock.
> +		/*
> +		 * This works because nobody besides us can lock the bo or
> +		 * assume it is locked while its refcount is zero.
>   		 */
> -		spin_lock(&bo->bdev->lru_lock);
> +		WARN_ON_ONCE(kref_read(&bo->kref));
>   		bo->base.resv = &bo->base._resv;
> -		spin_unlock(&bo->bdev->lru_lock);
> +		/*
> +		 * Don't reorder against kref_init().
> +		 * Matches the implicit full barrier of a successful
> +		 * kref_get_unless_zero() after a lru_list_lookup.
> +		 */
> +		smp_mb();
>   	}
>   
>   	return r;
> @@ -274,122 +282,66 @@ static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo)
>   }
>   
>   /**
> - * function ttm_bo_cleanup_refs
> - * If bo idle, remove from lru lists, and unref.
> - * If not idle, block if possible.
> - *
> - * Must be called with lru_lock and reservation held, this function
> - * will drop the lru lock and optionally the reservation lock before returning.
> + * ttm_bo_cleanup_refs - idle and remove pages and gpu-bindings and remove
> + * from lru_lists.
>    *
>    * @bo:                    The buffer object to clean-up
>    * @interruptible:         Any sleeps should occur interruptibly.
> - * @no_wait_gpu:           Never wait for gpu. Return -EBUSY instead.
> - * @unlock_resv:           Unlock the reservation lock as well.
> + * @no_wait_gpu:           Never wait for gpu. Return -EBUSY if not idle.
>    */
> -
>   static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
> -			       bool interruptible, bool no_wait_gpu,
> -			       bool unlock_resv)
> +			       bool interruptible, bool no_wait_gpu)
>   {
> -	struct dma_resv *resv = &bo->base._resv;
>   	int ret;
>   
> -	if (dma_resv_test_signaled_rcu(resv, true))
> -		ret = 0;
> -	else
> -		ret = -EBUSY;
> -
> -	if (ret && !no_wait_gpu) {
> -		long lret;
> -
> -		if (unlock_resv)
> -			dma_resv_unlock(bo->base.resv);
> -		spin_unlock(&bo->bdev->lru_lock);
> -
> -		lret = dma_resv_wait_timeout_rcu(resv, true, interruptible,
> -						 30 * HZ);
> -
> -		if (lret < 0)
> -			return lret;
> -		else if (lret == 0)
> -			return -EBUSY;
> -
> -		spin_lock(&bo->bdev->lru_lock);
> -		if (unlock_resv && !dma_resv_trylock(bo->base.resv)) {
> -			/*
> -			 * We raced, and lost, someone else holds the reservation now,
> -			 * and is probably busy in ttm_bo_cleanup_memtype_use.
> -			 *
> -			 * Even if it's not the case, because we finished waiting any
> -			 * delayed destruction would succeed, so just return success
> -			 * here.
> -			 */
> -			spin_unlock(&bo->bdev->lru_lock);
> -			return 0;
> -		}
> -		ret = 0;
> -	}
> -
> -	if (ret || unlikely(list_empty(&bo->ddestroy))) {
> -		if (unlock_resv)
> -			dma_resv_unlock(bo->base.resv);
> -		spin_unlock(&bo->bdev->lru_lock);
> +	dma_resv_assert_held(bo->base.resv);
> +	WARN_ON_ONCE(!bo->deleted);
> +	ret = ttm_bo_wait(bo, interruptible, no_wait_gpu);
> +	if (ret)
>   		return ret;
> -	}
>   
> +	ttm_bo_cleanup_memtype_use(bo);
> +	spin_lock(&bo->bdev->lru_lock);
>   	ttm_bo_del_from_lru(bo);
> -	list_del_init(&bo->ddestroy);
>   	spin_unlock(&bo->bdev->lru_lock);
> -	ttm_bo_cleanup_memtype_use(bo);
> -
> -	if (unlock_resv)
> -		dma_resv_unlock(bo->base.resv);
> -
> -	ttm_bo_put(bo);
>   
>   	return 0;
>   }
>   
>   /*
> - * Traverse the delayed list, and call ttm_bo_cleanup_refs on all
> - * encountered buffers.
> + * Isolate a part of the delayed destroy list and check for idle on all
> + * buffer objects on it. If idle, remove from the list and drop the
> + * delayed destroy refcount. Return all busy buffers to the delayed
> + * destroy list.
>    */
>   bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
>   {
> -	struct list_head removed;
> -	bool empty;
> -
> -	INIT_LIST_HEAD(&removed);
> -
> -	spin_lock(&bdev->lru_lock);
> -	while (!list_empty(&bdev->ddestroy)) {
> -		struct ttm_buffer_object *bo;
> -
> -		bo = list_first_entry(&bdev->ddestroy, struct ttm_buffer_object,
> -				      ddestroy);
> -		list_move_tail(&bo->ddestroy, &removed);
> -		if (!ttm_bo_get_unless_zero(bo))
> -			continue;
> -
> -		if (remove_all || bo->base.resv != &bo->base._resv) {
> -			spin_unlock(&bdev->lru_lock);
> -			dma_resv_lock(bo->base.resv, NULL);
> -
> -			spin_lock(&bdev->lru_lock);
> -			ttm_bo_cleanup_refs(bo, false, !remove_all, true);
> -
> -		} else if (dma_resv_trylock(bo->base.resv)) {
> -			ttm_bo_cleanup_refs(bo, false, !remove_all, true);
> -		} else {
> -			spin_unlock(&bdev->lru_lock);
> +	struct ttm_buffer_object *bo, *next;
> +	struct list_head isolated;
> +	bool empty, idle;
> +
> +	INIT_LIST_HEAD(&isolated);
> +
> +	spin_lock(&bdev->ddestroy_lock);
> +	list_splice_init(&bdev->ddestroy, &isolated);
> +	spin_unlock(&bdev->ddestroy_lock);
> +	list_for_each_entry_safe(bo, next, &isolated, ddestroy) {
> +		WARN_ON_ONCE(!kref_read(&bo->kref) ||
> +			     bo->base.resv != &bo->base._resv);
> +		if (!remove_all)
> +			idle = dma_resv_test_signaled_rcu(bo->base.resv, true);
> +		else
> +			idle = (dma_resv_wait_timeout_rcu(bo->base.resv, true,
> +							  false, 30 * HZ) > 0);
> +		if (idle) {
> +			list_del_init(&bo->ddestroy);
> +			ttm_bo_put(bo);
>   		}
> -
> -		ttm_bo_put(bo);
> -		spin_lock(&bdev->lru_lock);
>   	}
> -	list_splice_tail(&removed, &bdev->ddestroy);
> +	spin_lock(&bdev->ddestroy_lock);
> +	list_splice(&isolated, &bdev->ddestroy);
>   	empty = list_empty(&bdev->ddestroy);
> -	spin_unlock(&bdev->lru_lock);
> +	spin_unlock(&bdev->ddestroy_lock);
>   
>   	return empty;
>   }
> @@ -405,10 +357,16 @@ static void ttm_bo_release(struct kref *kref)
>   		ret = ttm_bo_individualize_resv(bo);
>   		if (ret) {
>   			/* Last resort, if we fail to allocate memory for the
> -			 * fences block for the BO to become idle
> +			 * fences block for the BO to become idle, and then
> +			 * we are free to update the resv pointer.
>   			 */
>   			dma_resv_wait_timeout_rcu(bo->base.resv, true, false,
>   						  30 * HZ);
> +			if (bo->type != ttm_bo_type_sg) {
> +				bo->base.resv = &bo->base._resv;
> +				/* Please see ttm_bo_individualize_resv() */
> +				smp_mb();
> +			}
>   		}
>   
>   		if (bo->bdev->funcs->release_notify)
> @@ -422,9 +380,8 @@ static void ttm_bo_release(struct kref *kref)
>   	    !dma_resv_trylock(bo->base.resv)) {
>   		/* The BO is not idle, resurrect it for delayed destroy */
>   		ttm_bo_flush_all_fences(bo);
> -		bo->deleted = true;
> -
>   		spin_lock(&bo->bdev->lru_lock);
> +		bo->deleted = true;
>   
>   		/*
>   		 * Make pinned bos immediately available to
> @@ -439,8 +396,10 @@ static void ttm_bo_release(struct kref *kref)
>   			ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
>   		}
>   
> +		spin_lock(&bo->bdev->ddestroy_lock);
>   		kref_init(&bo->kref);
>   		list_add_tail(&bo->ddestroy, &bdev->ddestroy);
> +		spin_unlock(&bo->bdev->ddestroy_lock);
>   		spin_unlock(&bo->bdev->lru_lock);
>   
>   		schedule_delayed_work(&bdev->wq,
> @@ -450,9 +409,11 @@ static void ttm_bo_release(struct kref *kref)
>   
>   	spin_lock(&bo->bdev->lru_lock);
>   	ttm_bo_del_from_lru(bo);
> -	list_del(&bo->ddestroy);
>   	spin_unlock(&bo->bdev->lru_lock);
>   
> +	pr_info("Deleting.\n");
> +	WARN_ON_ONCE(!list_empty_careful(&bo->ddestroy));
> +
>   	ttm_bo_cleanup_memtype_use(bo);
>   	dma_resv_unlock(bo->base.resv);
>   
> @@ -630,25 +591,26 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
>   		list_for_each_entry(bo, &man->lru[i], lru) {
>   			bool busy;
>   
> +			if (!ttm_bo_get_unless_zero(bo))
> +				continue;
> +
>   			if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
>   							    &busy)) {
>   				if (busy && !busy_bo && ticket !=
>   				    dma_resv_locking_ctx(bo->base.resv))
>   					busy_bo = bo;
> +				ttm_bo_put(bo);
>   				continue;
>   			}
>   
>   			if (place && !bdev->funcs->eviction_valuable(bo,
>   								      place)) {
> +				ttm_bo_put(bo);
>   				if (locked)
>   					dma_resv_unlock(bo->base.resv);
>   				continue;
>   			}
> -			if (!ttm_bo_get_unless_zero(bo)) {
> -				if (locked)
> -					dma_resv_unlock(bo->base.resv);
> -				continue;
> -			}
> +
>   			break;
>   		}
>   
> @@ -670,8 +632,11 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
>   	}
>   
>   	if (bo->deleted) {
> +		spin_unlock(&bdev->lru_lock);
>   		ret = ttm_bo_cleanup_refs(bo, ctx->interruptible,
> -					  ctx->no_wait_gpu, locked);
> +					  ctx->no_wait_gpu);
> +		if (locked)
> +			dma_resv_unlock(bo->base.resv);
>   		ttm_bo_put(bo);
>   		return ret;
>   	}
> @@ -1168,17 +1133,19 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
>   	bool locked;
>   	int ret;
>   
> -	if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked, NULL))
> +	if (!ttm_bo_get_unless_zero(bo))
>   		return -EBUSY;
>   
> -	if (!ttm_bo_get_unless_zero(bo)) {
> -		if (locked)
> -			dma_resv_unlock(bo->base.resv);
> +	if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked, NULL)) {
> +		ttm_bo_put(bo);
>   		return -EBUSY;
>   	}
>   
>   	if (bo->deleted) {
> -		ttm_bo_cleanup_refs(bo, false, false, locked);
> +		spin_unlock(&bo->bdev->lru_lock);
> +		ttm_bo_cleanup_refs(bo, false, false);
> +		if (locked)
> +			dma_resv_unlock(bo->base.resv);
>   		ttm_bo_put(bo);
>   		return 0;
>   	}
> diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
> index 9b787b3caeb5..b941989885ed 100644
> --- a/drivers/gpu/drm/ttm/ttm_device.c
> +++ b/drivers/gpu/drm/ttm/ttm_device.c
> @@ -228,6 +228,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
>   	bdev->vma_manager = vma_manager;
>   	INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
>   	spin_lock_init(&bdev->lru_lock);
> +	spin_lock_init(&bdev->ddestroy_lock);
>   	INIT_LIST_HEAD(&bdev->ddestroy);
>   	bdev->dev_mapping = mapping;
>   	mutex_lock(&ttm_global_mutex);
> diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
> index 7c8f87bd52d3..fa8c4f6a169e 100644
> --- a/include/drm/ttm/ttm_device.h
> +++ b/include/drm/ttm/ttm_device.h
> @@ -279,6 +279,7 @@ struct ttm_device {
>   	 * Protection for the per manager LRU and ddestroy lists.
>   	 */
>   	spinlock_t lru_lock;
> +	spinlock_t ddestroy_lock;
>   	struct list_head ddestroy;
>   
>   	/*



More information about the dri-devel mailing list