[PATCH 02/11] drm/ttm: fix busy memory to fail other user v8

Wed May 15 09:27:25 UTC 2019

Am 15.05.19 um 10:45 schrieb Daniel Vetter:
> On Wed, May 15, 2019 at 10:38:28AM +0200, Daniel Vetter wrote:
>> On Tue, May 14, 2019 at 02:31:18PM +0200, Christian König wrote:
>>> From: Chunming Zhou <david1.zhou at amd.com>
>>>
>>> heavy gpu job could occupy memory long time, which lead other user fail to get memory.
>>>
>>> basically pick up Christian idea:
>>>
>>> 1. Reserve the BO in DC using a ww_mutex ticket (trivial).
>>> 2. If we then run into this EBUSY condition in TTM check if the BO we need memory for (or rather the ww_mutex of its reservation object) has a ticket assigned.
>>> 3. If we have a ticket we grab a reference to the first BO on the LRU, drop the LRU lock and try to grab the reservation lock with the ticket.
>>> 4. If getting the reservation lock with the ticket succeeded we check if the BO is still the first one on the LRU in question (the BO could have moved).
>>> 5. If the BO is still the first one on the LRU in question we try to evict it as we would evict any other BO.
>>> 6. If any of the "If's" above fail we just back off and return -EBUSY.
>>>
>>> v2: fix some minor check
>>> v3: address Christian v2 comments.
>>> v4: fix some missing
>>> v5: handle first_bo unlock and bo_get/put
>>> v6: abstract unified iterate function, and handle all possible usecase not only pinned bo.
>>> v7: pass request bo->resv to ttm_bo_evict_first
>>> v8 (chk): minimal coding style fix
>>>
>>> Change-Id: I21423fb922f885465f13833c41df1e134364a8e7
>>> Signed-off-by: Chunming Zhou <david1.zhou at amd.com>
>>> Reviewed-by: Christian König <christian.koenig at amd.com>
>> I think this closes a big gap between ttm and the bkl/struct_mutex
>> drivers - it's much easier to guarantee you can evict everything if
>> there's only a single lock :-)
>>
>> Would be absolutely awesome if we could extract this as some kind of
>> building block, like we've done with lots of other ttm concepts already
>> (reservation_obj, fences, ...).
>>
>> Just an aside really.
> Ofc this is meant as a comment on the entire patch series, without all the
> other patches to make sure BO always stay on a relevant LRU there's still
> gaps in the guaranteed forward progress eviction algorithm.

Yeah, the problem surfaced because of patch #4. Previously TTM would 
have just ignored all errors and continued to try different placements 
and only return -ENOMEM when we ran out of a possible placements.

I probably need to either fix patch #4 or reorder the patches.

Thanks for the note,
Christian.

> -Daniel
>
>> -Daniel
>>
>>> ---
>>>   drivers/gpu/drm/ttm/ttm_bo.c | 113 +++++++++++++++++++++++++++++------
>>>   1 file changed, 96 insertions(+), 17 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>>> index 2845fceb2fbd..e634d3a36923 100644
>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>>> @@ -766,11 +766,13 @@ EXPORT_SYMBOL(ttm_bo_eviction_valuable);
>>>    * b. Otherwise, trylock it.
>>>    */
>>>   static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
>>> -			struct ttm_operation_ctx *ctx, bool *locked)
>>> +			struct ttm_operation_ctx *ctx, bool *locked, bool *busy)
>>>   {
>>>   	bool ret = false;
>>>   
>>>   	*locked = false;
>>> +	if (busy)
>>> +		*busy = false;
>>>   	if (bo->resv == ctx->resv) {
>>>   		reservation_object_assert_held(bo->resv);
>>>   		if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT
>>> @@ -779,35 +781,46 @@ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
>>>   	} else {
>>>   		*locked = reservation_object_trylock(bo->resv);
>>>   		ret = *locked;
>>> +		if (!ret && busy)
>>> +			*busy = true;
>>>   	}
>>>   
>>>   	return ret;
>>>   }
>>>   
>>> -static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
>>> -			       uint32_t mem_type,
>>> -			       const struct ttm_place *place,
>>> -			       struct ttm_operation_ctx *ctx)
>>> +static struct ttm_buffer_object*
>>> +ttm_mem_find_evitable_bo(struct ttm_bo_device *bdev,
>>> +			 struct ttm_mem_type_manager *man,
>>> +			 const struct ttm_place *place,
>>> +			 struct ttm_operation_ctx *ctx,
>>> +			 struct ttm_buffer_object **first_bo,
>>> +			 bool *locked)
>>>   {
>>> -	struct ttm_bo_global *glob = bdev->glob;
>>> -	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
>>>   	struct ttm_buffer_object *bo = NULL;
>>> -	bool locked = false;
>>> -	unsigned i;
>>> -	int ret;
>>> +	int i;
>>>   
>>> -	spin_lock(&glob->lru_lock);
>>> +	if (first_bo)
>>> +		*first_bo = NULL;
>>>   	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>>>   		list_for_each_entry(bo, &man->lru[i], lru) {
>>> -			if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked))
>>> +			bool busy = false;
>>> +
>>> +			if (!ttm_bo_evict_swapout_allowable(bo, ctx, locked,
>>> +							    &busy)) {
>>> +				if (first_bo && !(*first_bo) && busy) {
>>> +					ttm_bo_get(bo);
>>> +					*first_bo = bo;
>>> +				}
>>>   				continue;
>>> +			}
>>>   
>>>   			if (place && !bdev->driver->eviction_valuable(bo,
>>>   								      place)) {
>>> -				if (locked)
>>> +				if (*locked)
>>>   					reservation_object_unlock(bo->resv);
>>>   				continue;
>>>   			}
>>> +
>>>   			break;
>>>   		}
>>>   
>>> @@ -818,9 +831,69 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
>>>   		bo = NULL;
>>>   	}
>>>   
>>> +	return bo;
>>> +}
>>> +
>>> +static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
>>> +			       uint32_t mem_type,
>>> +			       const struct ttm_place *place,
>>> +			       struct ttm_operation_ctx *ctx,
>>> +			       struct reservation_object *request_resv)
>>> +{
>>> +	struct ttm_bo_global *glob = bdev->glob;
>>> +	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
>>> +	struct ttm_buffer_object *bo = NULL, *first_bo = NULL;
>>> +	bool locked = false;
>>> +	int ret;
>>> +
>>> +	spin_lock(&glob->lru_lock);
>>> +	bo = ttm_mem_find_evitable_bo(bdev, man, place, ctx, &first_bo,
>>> +				      &locked);
>>>   	if (!bo) {
>>> +		struct ww_acquire_ctx *acquire_ctx = request_resv->lock.ctx;
>>> +		struct ttm_operation_ctx busy_ctx;
>>> +
>>>   		spin_unlock(&glob->lru_lock);
>>> -		return -EBUSY;
>>> +		/* check if other user occupy memory too long time */
>>> +		if (!first_bo || !request_resv || !request_resv->lock.ctx) {
>>> +			if (first_bo)
>>> +				ttm_bo_put(first_bo);
>>> +			return -EBUSY;
>>> +		}
>>> +		if (first_bo->resv == request_resv) {
>>> +			ttm_bo_put(first_bo);
>>> +			return -EBUSY;
>>> +		}
>>> +		if (ctx->interruptible)
>>> +			ret = ww_mutex_lock_interruptible(&first_bo->resv->lock,
>>> +							  acquire_ctx);
>>> +		else
>>> +			ret = ww_mutex_lock(&first_bo->resv->lock,
>>> +					    acquire_ctx);
>>> +		if (ret) {
>>> +			ttm_bo_put(first_bo);
>>> +			return ret;
>>> +		}
>>> +		spin_lock(&glob->lru_lock);
>>> +		/* previous busy resv lock is held by above, idle now,
>>> +		 * so let them evictable.
>>> +		 */
>>> +		busy_ctx.interruptible = ctx->interruptible;
>>> +		busy_ctx.no_wait_gpu   = ctx->no_wait_gpu;
>>> +		busy_ctx.resv	       = first_bo->resv;
>>> +		busy_ctx.flags	       = TTM_OPT_FLAG_ALLOW_RES_EVICT;
>>> +
>>> +		bo = ttm_mem_find_evitable_bo(bdev, man, place, &busy_ctx, NULL,
>>> +					      &locked);
>>> +		if (bo && (bo->resv == first_bo->resv))
>>> +			locked = true;
>>> +		else if (bo)
>>> +			ww_mutex_unlock(&first_bo->resv->lock);
>>> +		if (!bo) {
>>> +			spin_unlock(&glob->lru_lock);
>>> +			ttm_bo_put(first_bo);
>>> +			return -EBUSY;
>>> +		}
>>>   	}
>>>   
>>>   	kref_get(&bo->list_kref);
>>> @@ -829,11 +902,15 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
>>>   		ret = ttm_bo_cleanup_refs(bo, ctx->interruptible,
>>>   					  ctx->no_wait_gpu, locked);
>>>   		kref_put(&bo->list_kref, ttm_bo_release_list);
>>> +		if (first_bo)
>>> +			ttm_bo_put(first_bo);
>>>   		return ret;
>>>   	}
>>>   
>>>   	ttm_bo_del_from_lru(bo);
>>>   	spin_unlock(&glob->lru_lock);
>>> +	if (first_bo)
>>> +		ttm_bo_put(first_bo);
>>>   
>>>   	ret = ttm_bo_evict(bo, ctx);
>>>   	if (locked) {
>>> @@ -907,7 +984,7 @@ static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo,
>>>   			return ret;
>>>   		if (mem->mm_node)
>>>   			break;
>>> -		ret = ttm_mem_evict_first(bdev, mem_type, place, ctx);
>>> +		ret = ttm_mem_evict_first(bdev, mem_type, place, ctx, bo->resv);
>>>   		if (unlikely(ret != 0))
>>>   			return ret;
>>>   	} while (1);
>>> @@ -1401,7 +1478,8 @@ static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev,
>>>   	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>>>   		while (!list_empty(&man->lru[i])) {
>>>   			spin_unlock(&glob->lru_lock);
>>> -			ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx);
>>> +			ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx,
>>> +						  NULL);
>>>   			if (ret)
>>>   				return ret;
>>>   			spin_lock(&glob->lru_lock);
>>> @@ -1772,7 +1850,8 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct ttm_operation_ctx *ctx)
>>>   	spin_lock(&glob->lru_lock);
>>>   	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>>>   		list_for_each_entry(bo, &glob->swap_lru[i], swap) {
>>> -			if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked)) {
>>> +			if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
>>> +							   NULL)) {
>>>   				ret = 0;
>>>   				break;
>>>   			}
>>> -- 
>>> 2.17.1
>>>
>>> _______________________________________________
>>> dri-devel mailing list
>>> dri-devel at lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>> -- 
>> Daniel Vetter
>> Software Engineer, Intel Corporation
>> http://blog.ffwll.ch