[PATCH 1/2] drm/ttm: fix busy memory to fail other user v7

Zhou, David(ChunMing) David1.Zhou at amd.com
Thu May 9 14:46:19 UTC 2019


I know that before, it will issue warning only when debug option is enabled. Removing that is ok to me.
I only help Prike draft your idea, and Prike is trying this patch on his side. The latest feedback he gave me is first_bo is always null, code doesn't run into busy path, which is very confusing me, and he said  he is debugging  that.

-David


-------- Original Message --------
Subject: Re: [PATCH 1/2] drm/ttm: fix busy memory to fail other user v7
From: "Koenig, Christian"
To: "Zhou, David(ChunMing)" ,"Liang, Prike" ,dri-devel at lists.freedesktop.org
CC:

I've foudn one more problem with this.

With lockdep enabled I get a warning because ttm_eu_reserve_buffers()
has called ww_acquire_done() on the ticket (which essentially means we
are done, no more locking with that ticket).

The simplest solution is probably to just remove the call to
ww_acquire_done() from ttm_eu_reserve_buffers().

Christian.

Am 07.05.19 um 13:45 schrieb Chunming Zhou:
> heavy gpu job could occupy memory long time, which lead other user fail to get memory.
>
> basically pick up Christian idea:
>
> 1. Reserve the BO in DC using a ww_mutex ticket (trivial).
> 2. If we then run into this EBUSY condition in TTM check if the BO we need memory for (or rather the ww_mutex of its reservation object) has a ticket assigned.
> 3. If we have a ticket we grab a reference to the first BO on the LRU, drop the LRU lock and try to grab the reservation lock with the ticket.
> 4. If getting the reservation lock with the ticket succeeded we check if the BO is still the first one on the LRU in question (the BO could have moved).
> 5. If the BO is still the first one on the LRU in question we try to evict it as we would evict any other BO.
> 6. If any of the "If's" above fail we just back off and return -EBUSY.
>
> v2: fix some minor check
> v3: address Christian v2 comments.
> v4: fix some missing
> v5: handle first_bo unlock and bo_get/put
> v6: abstract unified iterate function, and handle all possible usecase not only pinned bo.
> v7: pass request bo->resv to ttm_bo_evict_first
>
> Change-Id: I21423fb922f885465f13833c41df1e134364a8e7
> Signed-off-by: Chunming Zhou <david1.zhou at amd.com>
> ---
>   drivers/gpu/drm/ttm/ttm_bo.c | 111 +++++++++++++++++++++++++++++------
>   1 file changed, 94 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 8502b3ed2d88..f5e6328e4a57 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -766,11 +766,13 @@ EXPORT_SYMBOL(ttm_bo_eviction_valuable);
>    * b. Otherwise, trylock it.
>    */
>   static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
> -                     struct ttm_operation_ctx *ctx, bool *locked)
> +                     struct ttm_operation_ctx *ctx, bool *locked, bool *busy)
>   {
>        bool ret = false;
>
>        *locked = false;
> +     if (busy)
> +             *busy = false;
>        if (bo->resv == ctx->resv) {
>                reservation_object_assert_held(bo->resv);
>                if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT
> @@ -779,35 +781,46 @@ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
>        } else {
>                *locked = reservation_object_trylock(bo->resv);
>                ret = *locked;
> +             if (!ret && busy)
> +                     *busy = true;
>        }
>
>        return ret;
>   }
>
> -static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
> -                            uint32_t mem_type,
> -                            const struct ttm_place *place,
> -                            struct ttm_operation_ctx *ctx)
> +static struct ttm_buffer_object*
> +ttm_mem_find_evitable_bo(struct ttm_bo_device *bdev,
> +                      struct ttm_mem_type_manager *man,
> +                      const struct ttm_place *place,
> +                      struct ttm_operation_ctx *ctx,
> +                      struct ttm_buffer_object **first_bo,
> +                      bool *locked)
>   {
> -     struct ttm_bo_global *glob = bdev->glob;
> -     struct ttm_mem_type_manager *man = &bdev->man[mem_type];
>        struct ttm_buffer_object *bo = NULL;
> -     bool locked = false;
> -     unsigned i;
> -     int ret;
> +     int i;
>
> -     spin_lock(&glob->lru_lock);
> +     if (first_bo)
> +             *first_bo = NULL;
>        for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>                list_for_each_entry(bo, &man->lru[i], lru) {
> -                     if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked))
> +                     bool busy = false;
> +
> +                     if (!ttm_bo_evict_swapout_allowable(bo, ctx, locked,
> +                                                         &busy)) {
> +                             if (first_bo && !(*first_bo) && busy) {
> +                                     ttm_bo_get(bo);
> +                                     *first_bo = bo;
> +                             }
>                                continue;
> +                     }
>
>                        if (place && !bdev->driver->eviction_valuable(bo,
>                                                                      place)) {
> -                             if (locked)
> +                             if (*locked)
>                                        reservation_object_unlock(bo->resv);
>                                continue;
>                        }
> +
>                        break;
>                }
>
> @@ -818,9 +831,67 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
>                bo = NULL;
>        }
>
> +     return bo;
> +}
> +
> +static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
> +                            uint32_t mem_type,
> +                            const struct ttm_place *place,
> +                            struct ttm_operation_ctx *ctx,
> +                            struct reservation_object *request_resv)
> +{
> +     struct ttm_bo_global *glob = bdev->glob;
> +     struct ttm_mem_type_manager *man = &bdev->man[mem_type];
> +     struct ttm_buffer_object *bo = NULL, *first_bo = NULL;
> +     bool locked = false;
> +     int ret;
> +
> +     spin_lock(&glob->lru_lock);
> +     bo = ttm_mem_find_evitable_bo(bdev, man, place, ctx, &first_bo,
> +                                   &locked);
>        if (!bo) {
> +             struct ttm_operation_ctx busy_ctx;
> +
>                spin_unlock(&glob->lru_lock);
> -             return -EBUSY;
> +             /* check if other user occupy memory too long time */
> +             if (!first_bo || !request_resv || !request_resv->lock.ctx) {
> +                     if (first_bo)
> +                             ttm_bo_put(first_bo);
> +                     return -EBUSY;
> +             }
> +             if (first_bo->resv == request_resv) {
> +                     ttm_bo_put(first_bo);
> +                     return -EBUSY;
> +             }
> +             if (ctx->interruptible)
> +                     ret = ww_mutex_lock_interruptible(&first_bo->resv->lock,
> +                                                       request_resv->lock.ctx);
> +             else
> +                     ret = ww_mutex_lock(&first_bo->resv->lock, request_resv->lock.ctx);
> +             if (ret) {
> +                     ttm_bo_put(first_bo);
> +                     return ret;
> +             }
> +             spin_lock(&glob->lru_lock);
> +             /* previous busy resv lock is held by above, idle now,
> +              * so let them evictable.
> +              */
> +             busy_ctx.interruptible = ctx->interruptible;
> +             busy_ctx.no_wait_gpu   = ctx->no_wait_gpu;
> +             busy_ctx.resv          = first_bo->resv;
> +             busy_ctx.flags         = TTM_OPT_FLAG_ALLOW_RES_EVICT;
> +
> +             bo = ttm_mem_find_evitable_bo(bdev, man, place, &busy_ctx, NULL,
> +                                           &locked);
> +             if (bo && (bo->resv == first_bo->resv))
> +                     locked = true;
> +             else if (bo)
> +                     ww_mutex_unlock(&first_bo->resv->lock);
> +             if (!bo) {
> +                     spin_unlock(&glob->lru_lock);
> +                     ttm_bo_put(first_bo);
> +                     return -EBUSY;
> +             }
>        }
>
>        kref_get(&bo->list_kref);
> @@ -829,11 +900,15 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
>                ret = ttm_bo_cleanup_refs(bo, ctx->interruptible,
>                                          ctx->no_wait_gpu, locked);
>                kref_put(&bo->list_kref, ttm_bo_release_list);
> +             if (first_bo)
> +                     ttm_bo_put(first_bo);
>                return ret;
>        }
>
>        ttm_bo_del_from_lru(bo);
>        spin_unlock(&glob->lru_lock);
> +     if (first_bo)
> +             ttm_bo_put(first_bo);
>
>        ret = ttm_bo_evict(bo, ctx);
>        if (locked) {
> @@ -907,7 +982,7 @@ static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo,
>                        return ret;
>                if (mem->mm_node)
>                        break;
> -             ret = ttm_mem_evict_first(bdev, mem_type, place, ctx);
> +             ret = ttm_mem_evict_first(bdev, mem_type, place, ctx, bo->resv);
>                if (unlikely(ret != 0))
>                        return ret;
>        } while (1);
> @@ -1413,7 +1488,8 @@ static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev,
>        for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>                while (!list_empty(&man->lru[i])) {
>                        spin_unlock(&glob->lru_lock);
> -                     ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx);
> +                     ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx,
> +                                               NULL);
>                        if (ret)
>                                return ret;
>                        spin_lock(&glob->lru_lock);
> @@ -1784,7 +1860,8 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct ttm_operation_ctx *ctx)
>        spin_lock(&glob->lru_lock);
>        for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>                list_for_each_entry(bo, &glob->swap_lru[i], swap) {
> -                     if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked)) {
> +                     if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
> +                                                        NULL)) {
>                                ret = 0;
>                                break;
>                        }

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/dri-devel/attachments/20190509/58bc3af1/attachment-0001.html>


More information about the dri-devel mailing list