[PATCH] drm/ttm: fix busy memory to fail other user v5
zhoucm1
zhoucm1 at amd.com
Tue May 7 09:38:21 UTC 2019
On 2019年05月07日 17:03, Christian König wrote:
> [CAUTION: External Email]
>
> Ping!
in fact, already address your comments, just wait for Prike test result,
anyway, send v6 first.
-David
>
> Marek is going to need this for its GDS patches as well.
>
> Christina.
>
> Am 30.04.19 um 11:10 schrieb Christian König:
>> Am 30.04.19 um 09:01 schrieb Chunming Zhou:
>>> heavy gpu job could occupy memory long time, which lead other user
>>> fail to get memory.
>>>
>>> basically pick up Christian idea:
>>>
>>> 1. Reserve the BO in DC using a ww_mutex ticket (trivial).
>>> 2. If we then run into this EBUSY condition in TTM check if the BO we
>>> need memory for (or rather the ww_mutex of its reservation object)
>>> has a ticket assigned.
>>> 3. If we have a ticket we grab a reference to the first BO on the
>>> LRU, drop the LRU lock and try to grab the reservation lock with the
>>> ticket.
>>> 4. If getting the reservation lock with the ticket succeeded we check
>>> if the BO is still the first one on the LRU in question (the BO could
>>> have moved).
>>> 5. If the BO is still the first one on the LRU in question we try to
>>> evict it as we would evict any other BO.
>>> 6. If any of the "If's" above fail we just back off and return -EBUSY.
>>>
>>> v2: fix some minor check
>>> v3: address Christian v2 comments.
>>> v4: fix some missing
>>> v5: handle first_bo unlock and bo_get/put
>>>
>>> Change-Id: I21423fb922f885465f13833c41df1e134364a8e7
>>> Signed-off-by: Chunming Zhou <david1.zhou at amd.com>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 7 +-
>>> .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 22 +++--
>>> drivers/gpu/drm/ttm/ttm_bo.c | 81
>>> +++++++++++++++++--
>>> 3 files changed, 99 insertions(+), 11 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> index affde72b44db..523773e85284 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> @@ -811,7 +811,12 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo
>>> *bo, u32 domain,
>>> u64 min_offset, u64 max_offset)
>>> {
>>> struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>>> - struct ttm_operation_ctx ctx = { false, false };
>>> + struct ttm_operation_ctx ctx = {
>>> + .interruptible = false,
>>> + .no_wait_gpu = false,
>>> + .resv = bo->tbo.resv,
>>> + .flags = 0
>>> + };
>>> int r, i;
>>> if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
>>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>> index a5cacf846e1b..cc3677c4a4c2 100644
>>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>> @@ -4101,6 +4101,9 @@ static int dm_plane_helper_prepare_fb(struct
>>> drm_plane *plane,
>>> struct amdgpu_device *adev;
>>> struct amdgpu_bo *rbo;
>>> struct dm_plane_state *dm_plane_state_new, *dm_plane_state_old;
>>> + struct list_head list, duplicates;
>>> + struct ttm_validate_buffer tv;
>>> + struct ww_acquire_ctx ticket;
>>> uint64_t tiling_flags;
>>> uint32_t domain;
>>> int r;
>>> @@ -4117,9 +4120,18 @@ static int dm_plane_helper_prepare_fb(struct
>>> drm_plane *plane,
>>> obj = new_state->fb->obj[0];
>>> rbo = gem_to_amdgpu_bo(obj);
>>> adev = amdgpu_ttm_adev(rbo->tbo.bdev);
>>> - r = amdgpu_bo_reserve(rbo, false);
>>> - if (unlikely(r != 0))
>>> + INIT_LIST_HEAD(&list);
>>> + INIT_LIST_HEAD(&duplicates);
>>> +
>>> + tv.bo = &rbo->tbo;
>>> + tv.num_shared = 1;
>>> + list_add(&tv.head, &list);
>>> +
>>> + r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
>>> + if (r) {
>>> + dev_err(adev->dev, "fail to reserve bo (%d)\n", r);
>>> return r;
>>> + }
>>> if (plane->type != DRM_PLANE_TYPE_CURSOR)
>>> domain = amdgpu_display_supported_domains(adev);
>>> @@ -4130,21 +4142,21 @@ static int dm_plane_helper_prepare_fb(struct
>>> drm_plane *plane,
>>> if (unlikely(r != 0)) {
>>> if (r != -ERESTARTSYS)
>>> DRM_ERROR("Failed to pin framebuffer with error %d\n",
>>> r);
>>> - amdgpu_bo_unreserve(rbo);
>>> + ttm_eu_backoff_reservation(&ticket, &list);
>>> return r;
>>> }
>>> r = amdgpu_ttm_alloc_gart(&rbo->tbo);
>>> if (unlikely(r != 0)) {
>>> amdgpu_bo_unpin(rbo);
>>> - amdgpu_bo_unreserve(rbo);
>>> + ttm_eu_backoff_reservation(&ticket, &list);
>>> DRM_ERROR("%p bind failed\n", rbo);
>>> return r;
>>> }
>>> amdgpu_bo_get_tiling_flags(rbo, &tiling_flags);
>>> - amdgpu_bo_unreserve(rbo);
>>> + ttm_eu_backoff_reservation(&ticket, &list);
>>
>> Well I can only repeat myself, please put that into a separate patch!
>>
>>> afb->address = amdgpu_bo_gpu_offset(rbo);
>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c
>>> b/drivers/gpu/drm/ttm/ttm_bo.c
>>> index 8502b3ed2d88..2c4963e105d9 100644
>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>>> @@ -766,11 +766,13 @@ EXPORT_SYMBOL(ttm_bo_eviction_valuable);
>>> * b. Otherwise, trylock it.
>>> */
>>> static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object
>>> *bo,
>>> - struct ttm_operation_ctx *ctx, bool *locked)
>>> + struct ttm_operation_ctx *ctx, bool *locked, bool *busy)
>>> {
>>> bool ret = false;
>>> *locked = false;
>>> + if (busy)
>>> + *busy = false;
>>> if (bo->resv == ctx->resv) {
>>> reservation_object_assert_held(bo->resv);
>>> if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT
>>> @@ -779,6 +781,8 @@ static bool ttm_bo_evict_swapout_allowable(struct
>>> ttm_buffer_object *bo,
>>> } else {
>>> *locked = reservation_object_trylock(bo->resv);
>>> ret = *locked;
>>> + if (!ret && busy)
>>> + *busy = true;
>>> }
>>> return ret;
>>> @@ -791,7 +795,7 @@ static int ttm_mem_evict_first(struct
>>> ttm_bo_device *bdev,
>>> {
>>> struct ttm_bo_global *glob = bdev->glob;
>>> struct ttm_mem_type_manager *man = &bdev->man[mem_type];
>>> - struct ttm_buffer_object *bo = NULL;
>>> + struct ttm_buffer_object *bo = NULL, *first_bo = NULL;
>>> bool locked = false;
>>> unsigned i;
>>> int ret;
>>> @@ -799,8 +803,15 @@ static int ttm_mem_evict_first(struct
>>> ttm_bo_device *bdev,
>>> spin_lock(&glob->lru_lock);
>>> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>>> list_for_each_entry(bo, &man->lru[i], lru) {
>>> - if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked))
>>> + bool busy = false;
>>> + if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
>>> + &busy)) {
>>> + if (!first_bo && busy) {
>>> + ttm_bo_get(bo);
>>> + first_bo = bo;
>>> + }
>>> continue;
>>> + }
>>> if (place && !bdev->driver->eviction_valuable(bo,
>>> place)) {
>>> @@ -808,6 +819,7 @@ static int ttm_mem_evict_first(struct
>>> ttm_bo_device *bdev,
>>> reservation_object_unlock(bo->resv);
>>> continue;
>>> }
>>> +
>>> break;
>>> }
>>> @@ -820,7 +832,65 @@ static int ttm_mem_evict_first(struct
>>> ttm_bo_device *bdev,
>>> if (!bo) {
>>> spin_unlock(&glob->lru_lock);
>>> - return -EBUSY;
>>> + /* check if other user occupy memory too long time */
>>> + if (!first_bo || !ctx || !ctx->resv || !ctx->resv->lock.ctx) {
>>> + if (first_bo)
>>> + ttm_bo_put(first_bo);
>>> + return -EBUSY;
>>> + }
>>> + if (ctx->interruptible)
>>> + ret = ww_mutex_lock_interruptible(&first_bo->resv->lock,
>>> + ctx->resv->lock.ctx);
>>> + else
>>> + ret = ww_mutex_lock(&first_bo->resv->lock,
>>> ctx->resv->lock.ctx);
>>> + if (ret) {
>>> + ttm_bo_put(first_bo);
>>> + return ret;
>>> + }
>>> + spin_lock(&glob->lru_lock);
>>> + for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>>> + /* previous busy resv lock is held by above, idle now,
>>> + * so let them evictable.
>>> + */
>>> + struct ttm_operation_ctx busy_ctx = {
>>> + .interruptible = ctx->interruptible,
>>> + .no_wait_gpu = ctx->no_wait_gpu,
>>> + .resv = first_bo->resv,
>>> + .flags = TTM_OPT_FLAG_ALLOW_RES_EVICT
>>> + };
>>> + list_for_each_entry(bo, &man->lru[i], lru) {
>>> + if (!ttm_bo_evict_swapout_allowable(bo,
>>> + &busy_ctx,
>>> + &locked,
>>> + NULL))
>>> + continue;
>>> +
>>> + if (place && !bdev->driver->eviction_valuable(bo,
>>> + place)) {
>>> + if (locked)
>>> + reservation_object_unlock(bo->resv);
>>> + continue;
>>> + }
>>> + break;
>>> + }
>>> + /* If the inner loop terminated early, we have our
>>> candidate */
>>> + if (&bo->lru != &man->lru[i])
>>> + break;
>>> + bo = NULL;
>>> + }
>>
>> So we are now searching for any BO again which matches the criteria?
>> Good idea.
>>
>> Can we factor those two loops out into a separate function? I mean
>> they are mostly identical now to the one above, aren't they?
>>
>>> + if (bo && (bo->resv == first_bo->resv))
>>> + locked = true;
>>> + else if (bo)
>>> + ww_mutex_unlock(&first_bo->resv->lock);
>>> + ttm_bo_put(first_bo);
>>> + first_bo = NULL;
>>> + if (!bo) {
>>> + spin_unlock(&glob->lru_lock);
>>> + return -EBUSY;
>>> + }
>>> + } else {
>>> + if (first_bo)
>>> + ttm_bo_put(first_bo);
>>
>> This must come a bit later, since we can't call ttm_bo_put() while
>> holding the spinlock.
>>
>> Regards,
>> Christian.
>>
>>> }
>>> kref_get(&bo->list_kref);
>>> @@ -1784,7 +1854,8 @@ int ttm_bo_swapout(struct ttm_bo_global *glob,
>>> struct ttm_operation_ctx *ctx)
>>> spin_lock(&glob->lru_lock);
>>> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>>> list_for_each_entry(bo, &glob->swap_lru[i], swap) {
>>> - if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked)) {
>>> + if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
>>> + NULL)) {
>>> ret = 0;
>>> break;
>>> }
>>
>
More information about the dri-devel
mailing list