[PATCH 2/2] drm/ttm: Optimize delayed buffer destruction

Mon Oct 18 20:57:32 PDT 2010

On Thu, Oct 14, 2010 at 3:18 AM, Thomas Hellstrom <thellstrom at vmware.com> wrote:
> This commit replaces the ttm_bo_cleanup_ref function with two new functions.
> One for the case where the bo is not yet on the delayed destroy list, and
> one for the case where the bo was on the delayed destroy list, at least at
> the time of call. This makes it possible to optimize the two cases somewhat.

I tried booting this today, but on radeon starting X hit a recursive
spin lock on the lru lock.

(annotated below)

> + *
> + * @interruptible         Any sleeps should occur interruptibly.
> + * @no_wait_reserve       Never wait for reserve. Return -EBUSY instead.
> + * @no_wait_gpu           Never wait for gpu. Return -EBUSY instead.
> + */
> +
> +static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
> +                              bool interruptible,
> +                              bool no_wait_reserve,
> +                              bool no_wait_gpu)
> +{
> +       struct ttm_bo_global *glob = bo->glob;
> +       int put_count;
> +       int ret = 0;
> +
> +retry:
> +       spin_lock(&bo->lock);
> +       ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
> +       spin_unlock(&bo->lock);
> +
> +       if (unlikely(ret != 0))
> +               return ret;
> +
>        spin_lock(&glob->lru_lock);

^^ take LRU lock

> -       if (list_empty(&bo->ddestroy)) {
> -               void *sync_obj = bo->sync_obj;
> -               void *sync_obj_arg = bo->sync_obj_arg;
> +       ret = ttm_bo_reserve_locked(bo, interruptible,
> +                                   no_wait_reserve, false, 0);
>
> -               kref_get(&bo->list_kref);
> -               list_add_tail(&bo->ddestroy, &bdev->ddestroy);
> +       if (unlikely(ret != 0) || list_empty(&bo->ddestroy)) {
>                spin_unlock(&glob->lru_lock);

^^ drop in error path.

> -               spin_unlock(&bo->lock);
> +               return ret;
> +       }
>
> -               if (sync_obj)
> -                       driver->sync_obj_flush(sync_obj, sync_obj_arg);
> -               schedule_delayed_work(&bdev->wq,
> -                                     ((HZ / 100) < 1) ? 1 : HZ / 100);
> -               ret = 0;
> +       /**
> +        * We can re-check for sync object without taking
> +        * the bo::lock since setting the sync object requires
> +        * also bo::reserved. A busy object at this point may
> +        * be caused by another thread recently starting an accelerated
> +        * eviction.
> +        */
>
> -       } else {
> +       if (unlikely(bo->sync_obj)) {
> +               atomic_set(&bo->reserved, 0);
> +               wake_up_all(&bo->event_queue);
>                spin_unlock(&glob->lru_lock);

^^ drop for retry path.

> -               spin_unlock(&bo->lock);
> -               ret = -EBUSY;
> +               goto retry;
>        }
>
> -       return ret;
> +       put_count = ttm_bo_del_from_lru(bo);
> +       list_del_init(&bo->ddestroy);
> +       ++put_count;
> +
> +       ttm_bo_cleanup_memtype_use(bo);

^^ get into cleanup_memtype_use, which calls the ttm_bo_mem_put which
tries to take the lru lock inside
 ttm_bo_man_put_node

Dave.