[PATCH 04/10] drm/ttm: change fence_lock to inner lock, v3

Mon Nov 19 06:17:45 PST 2012

Hi,

This patch looks mostly good, although I think ttm_bo_cleanup_refs 
becomes overly complicated:
Could this do, or am I missing something?

static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
                    bool interruptible,
                    bool no_wait_reserve,
                    bool no_wait_gpu)
{
     struct ttm_bo_device *bdev = bo->bdev;
     struct ttm_bo_global *glob = bo->glob;
     int put_count;
     int ret = 0;

     /*
      * First, reserve while making sure we're still on the
      * ddestroy list.
      */
retry_reserve:
     spin_lock(&glob->lru_lock);

     if (unlikely(list_empty(&bo->ddestroy))) {
         spin_unlock(&glob->lru_lock);
         return 0;
     }

     ret = ttm_bo_reserve_locked(bo, false, true, false, 0);

     if (unlikely(ret == -EBUSY)) {
         spin_unlock(&glob->lru_lock);
         if (likely(!no_wait_reserve))
             ret = ttm_bo_wait_unreserved(bo, interruptible);
         if (unlikely(ret != 0))
             return ret;

         goto retry_reserve;
     }

     BUG_ON(ret != 0);

     spin_lock(&bdev->fence_lock);
     ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
     spin_unlock(&bdev->fence_lock);

     if (unlikely(ret != 0)) {
         atomic_set(&bo->reserved, 0);
         wake_up_all(&bo->event_queue);
         spin_unlock(&glob->lru_lock);
         return ret;
     }

     put_count = ttm_bo_del_from_lru(bo);
     list_del_init(&bo->ddestroy);
     ++put_count;

     spin_unlock(&glob->lru_lock);
     ttm_bo_cleanup_memtype_use(bo);

     atomic_set(&bo_reserved, 0);
     wake_up_all(&bo->event_queue);
     ttm_bo_list_ref_sub(bo, put_count, true);

     return 0;
}

On 11/12/2012 03:00 PM, Maarten Lankhorst wrote:
> I changed the hierarchy to make fence_lock the most inner lock,
> instead of outer lock. This will simplify things slightly, and
> hopefully makes it easier to make fence_lock global at one point
> should it be needed.
>
> To make things clearer, I change the order around in ttm_bo_cleanup_refs
> and ttm_bo_cleanup_refs_or_queue.
>
> A reservation is taken first, then fence lock is taken and a wait is attempted.
>
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst at canonical.com>
>
> v2:
>   - fix conflict with upstream race fix, simplifies ttm_bo_cleanup_refs
> v3:
>   - change removal of fence_lock to making it a inner lock instead
> ---
>   drivers/gpu/drm/ttm/ttm_bo.c           | 95 ++++++++++++++++------------------
>   drivers/gpu/drm/ttm/ttm_execbuf_util.c |  4 +-
>   2 files changed, 48 insertions(+), 51 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index a3383a7..70285ff 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -478,28 +478,26 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
>   {
>   	struct ttm_bo_device *bdev = bo->bdev;
>   	struct ttm_bo_global *glob = bo->glob;
> -	struct ttm_bo_driver *driver;
> +	struct ttm_bo_driver *driver = bdev->driver;
>   	void *sync_obj = NULL;
>   	int put_count;
>   	int ret;
>   
> -	spin_lock(&bdev->fence_lock);
> -	(void) ttm_bo_wait(bo, false, false, true);
> -	if (!bo->sync_obj) {
> -
> -		spin_lock(&glob->lru_lock);
> -
> -		/**
> -		 * Lock inversion between bo:reserve and bdev::fence_lock here,
> -		 * but that's OK, since we're only trylocking.
> -		 */
> -
> -		ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
> +	spin_lock(&glob->lru_lock);
> +	ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
> +	if (!ret) {
> +		spin_lock(&bdev->fence_lock);
> +		ret = ttm_bo_wait(bo, false, false, true);
>   
> -		if (unlikely(ret == -EBUSY))
> +		if (unlikely(ret == -EBUSY)) {
> +			sync_obj = driver->sync_obj_ref(bo->sync_obj);
> +			spin_unlock(&bdev->fence_lock);
> +			atomic_set(&bo->reserved, 0);
> +			wake_up_all(&bo->event_queue);
>   			goto queue;
> -
> +		}
>   		spin_unlock(&bdev->fence_lock);
> +
>   		put_count = ttm_bo_del_from_lru(bo);
>   
>   		atomic_set(&bo->reserved, 0);
> @@ -509,18 +507,11 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
>   		ttm_bo_list_ref_sub(bo, put_count, true);
>   
>   		return;
> -	} else {
> -		spin_lock(&glob->lru_lock);
>   	}
>   queue:
> -	driver = bdev->driver;
> -	if (bo->sync_obj)
> -		sync_obj = driver->sync_obj_ref(bo->sync_obj);
> -
>   	kref_get(&bo->list_kref);
>   	list_add_tail(&bo->ddestroy, &bdev->ddestroy);
>   	spin_unlock(&glob->lru_lock);
> -	spin_unlock(&bdev->fence_lock);
>   
>   	if (sync_obj) {
>   		driver->sync_obj_flush(sync_obj);
> @@ -546,54 +537,60 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>   			       bool no_wait_gpu)
>   {
>   	struct ttm_bo_device *bdev = bo->bdev;
> +	struct ttm_bo_driver *driver = bdev->driver;
>   	struct ttm_bo_global *glob = bo->glob;
>   	int put_count;
>   	int ret = 0;
> +	void *sync_obj;
>   
>   retry:
> -	spin_lock(&bdev->fence_lock);
> -	ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
> -	spin_unlock(&bdev->fence_lock);
> +	spin_lock(&glob->lru_lock);
>   
> -	if (unlikely(ret != 0))
> -		return ret;
> +	ret = ttm_bo_reserve_locked(bo, interruptible,
> +				    no_wait_reserve, false, 0);
>   
> -retry_reserve:
> -	spin_lock(&glob->lru_lock);
> +	if (unlikely(ret)) {
> +		spin_unlock(&glob->lru_lock);
> +		return ret;
> +	}
>   
>   	if (unlikely(list_empty(&bo->ddestroy))) {
> +		atomic_set(&bo->reserved, 0);
> +		wake_up_all(&bo->event_queue);
>   		spin_unlock(&glob->lru_lock);
>   		return 0;
>   	}
>   
> -	ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
> -
> -	if (unlikely(ret == -EBUSY)) {
> -		spin_unlock(&glob->lru_lock);
> -		if (likely(!no_wait_reserve))
> -			ret = ttm_bo_wait_unreserved(bo, interruptible);
> -		if (unlikely(ret != 0))
> +	spin_lock(&bdev->fence_lock);
> +	ret = ttm_bo_wait(bo, false, false, true);
> +	if (ret) {
> +		if (no_wait_gpu) {
> +			spin_unlock(&bdev->fence_lock);
> +			atomic_set(&bo->reserved, 0);
> +			wake_up_all(&bo->event_queue);
> +			spin_unlock(&glob->lru_lock);
>   			return ret;
> +		}
>   
> -		goto retry_reserve;
> -	}
> -
> -	BUG_ON(ret != 0);
> -
> -	/**
> -	 * We can re-check for sync object without taking
> -	 * the bo::lock since setting the sync object requires
> -	 * also bo::reserved. A busy object at this point may
> -	 * be caused by another thread recently starting an accelerated
> -	 * eviction.
> -	 */
> +		/**
> +		 * Take a reference to the fence and unreserve, if the wait
> +		 * was succesful and no new sync_obj was attached,
> +		 * ttm_bo_wait in retry will return ret = 0, and end the loop.
> +		 */
>   
> -	if (unlikely(bo->sync_obj)) {
> +		sync_obj = driver->sync_obj_ref(&bo->sync_obj);
> +		spin_unlock(&bdev->fence_lock);
>   		atomic_set(&bo->reserved, 0);
>   		wake_up_all(&bo->event_queue);
>   		spin_unlock(&glob->lru_lock);
> +
> +		ret = driver->sync_obj_wait(bo->sync_obj, false, interruptible);
> +		driver->sync_obj_unref(&sync_obj);
> +		if (ret)
> +			return ret;
>   		goto retry;
>   	}
> +	spin_unlock(&bdev->fence_lock);
>   
>   	put_count = ttm_bo_del_from_lru(bo);
>   	list_del_init(&bo->ddestroy);
> diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> index 1986d00..cd9e452 100644
> --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> @@ -213,8 +213,8 @@ void ttm_eu_fence_buffer_objects(struct list_head *list, void *sync_obj)
>   	driver = bdev->driver;
>   	glob = bo->glob;
>   
> -	spin_lock(&bdev->fence_lock);
>   	spin_lock(&glob->lru_lock);
> +	spin_lock(&bdev->fence_lock);
>   
>   	list_for_each_entry(entry, list, head) {
>   		bo = entry->bo;
> @@ -223,8 +223,8 @@ void ttm_eu_fence_buffer_objects(struct list_head *list, void *sync_obj)
>   		ttm_bo_unreserve_locked(bo);
>   		entry->reserved = false;
>   	}
> -	spin_unlock(&glob->lru_lock);
>   	spin_unlock(&bdev->fence_lock);
> +	spin_unlock(&glob->lru_lock);
>   
>   	list_for_each_entry(entry, list, head) {
>   		if (entry->old_sync_obj)