[PATCH] drm/ttm: fix busy memory to fail other user

Christian König ckoenig.leichtzumerken at gmail.com
Thu Apr 25 06:56:51 UTC 2019


Am 24.04.19 um 13:40 schrieb Chunming Zhou:
> basically pick up Christian idea:
>
> 1. Reserve the BO in DC using a ww_mutex ticket (trivial).

Please split that step into a separate patch.

> 2. If we then run into this EBUSY condition in TTM check if the BO we need memory for (or rather the ww_mutex of its reservation object) has a ticket assigned.
> 3. If we have a ticket we grab a reference to the first BO on the LRU, drop the LRU lock and try to grab the reservation lock with the ticket.
> 4. If getting the reservation lock with the ticket succeeded we check if the BO is still the first one on the LRU in question (the BO could have moved).
> 5. If the BO is still the first one on the LRU in question we try to evict it as we would evict any other BO.
> 6. If any of the "If's" above fail we just back off and return -EBUSY.
>
> Change-Id: I21423fb922f885465f13833c41df1e134364a8e7
> Signed-off-by: Chunming Zhou <david1.zhou at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    | 14 ++++-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.h    |  3 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c       |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       |  2 +-
>   .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 22 +++++--
>   drivers/gpu/drm/ttm/ttm_bo.c                  | 58 +++++++++++++++++--
>   include/drm/ttm/ttm_bo_api.h                  |  1 +
>   7 files changed, 86 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index affde72b44db..e08ff2244070 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -791,6 +791,7 @@ void amdgpu_bo_unref(struct amdgpu_bo **bo)
>    * @domain: domain to be pinned to
>    * @min_offset: the start of requested address range
>    * @max_offset: the end of requested address range
> + * @ticket: struct ww_acquire_ctx ticket, if the pin BO has a ticket assigned
>    *
>    * Pins the buffer object according to requested domain and address range. If
>    * the memory is unbound gart memory, binds the pages into gart table. Adjusts
> @@ -808,10 +809,17 @@ void amdgpu_bo_unref(struct amdgpu_bo **bo)
>    * 0 for success or a negative error code on failure.
>    */
>   int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
> -			     u64 min_offset, u64 max_offset)
> +			     u64 min_offset, u64 max_offset,
> +			     struct ww_acquire_ctx *ticket)
>   {
>   	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
> -	struct ttm_operation_ctx ctx = { false, false };
> +	struct ttm_operation_ctx ctx = {
> +                .interruptible = false,
> +                .no_wait_gpu = false,
> +                .resv = bo->tbo.resv,
> +		.ticket = ticket,

We can actually get the ticket from the BO using bo->resv->lock.ctx. 
That should make the whole patch less work.

> +                .flags = TTM_OPT_FLAG_ALLOW_RES_EVICT
> +        };
>   	int r, i;
>   
>   	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
> @@ -907,7 +915,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
>    */
>   int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain)
>   {
> -	return amdgpu_bo_pin_restricted(bo, domain, 0, 0);
> +	return amdgpu_bo_pin_restricted(bo, domain, 0, 0, NULL);
>   }
>   
>   /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index c430e8259038..41e7fff7f3f2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -247,7 +247,8 @@ struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo);
>   void amdgpu_bo_unref(struct amdgpu_bo **bo);
>   int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain);
>   int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
> -			     u64 min_offset, u64 max_offset);
> +			     u64 min_offset, u64 max_offset,
> +			     struct ww_acquire_ctx *ticket);
>   int amdgpu_bo_unpin(struct amdgpu_bo *bo);
>   int amdgpu_bo_evict_vram(struct amdgpu_device *adev);
>   int amdgpu_bo_init(struct amdgpu_device *adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index 22bd21efe6b1..dd31ce1a09e3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -1150,7 +1150,7 @@ static int amdgpu_ras_reserve_vram(struct amdgpu_device *adev,
>   	r = amdgpu_bo_pin_restricted(bo,
>   			AMDGPU_GEM_DOMAIN_VRAM,
>   			offset,
> -			offset + size);
> +			offset + size, NULL);
>   	if (r)
>   		goto error_pin;
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index afccca5b1f5f..e63af0debc7a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -1661,7 +1661,7 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
>   			AMDGPU_GEM_DOMAIN_VRAM,
>   			adev->fw_vram_usage.start_offset,
>   			(adev->fw_vram_usage.start_offset +
> -			adev->fw_vram_usage.size));
> +			adev->fw_vram_usage.size), NULL);
>   		if (r)
>   			goto error_pin;
>   		r = amdgpu_bo_kmap(adev->fw_vram_usage.reserved_bo,
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index a5cacf846e1b..bdbbc3891585 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -4101,6 +4101,9 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane,
>   	struct amdgpu_device *adev;
>   	struct amdgpu_bo *rbo;
>   	struct dm_plane_state *dm_plane_state_new, *dm_plane_state_old;
> +	struct list_head list, duplicates;
> +	struct ttm_validate_buffer tv;
> +	struct ww_acquire_ctx ticket;
>   	uint64_t tiling_flags;
>   	uint32_t domain;
>   	int r;
> @@ -4119,32 +4122,43 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane,
>   	adev = amdgpu_ttm_adev(rbo->tbo.bdev);
>   	r = amdgpu_bo_reserve(rbo, false);
>   	if (unlikely(r != 0))
> +	INIT_LIST_HEAD(&list);
> +	INIT_LIST_HEAD(&duplicates);
> +
> +	tv.bo = &rbo->tbo;
> +	tv.num_shared = 1;
> +	list_add(&tv.head, &list);
> +
> +	r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
> +	if (r) {
> +		dev_err(adev->dev, "fail to reserve bo (%d)\n", r);
>   		return r;
> +	}
>   
>   	if (plane->type != DRM_PLANE_TYPE_CURSOR)
>   		domain = amdgpu_display_supported_domains(adev);
>   	else
>   		domain = AMDGPU_GEM_DOMAIN_VRAM;
>   
> -	r = amdgpu_bo_pin(rbo, domain);
> +	r = amdgpu_bo_pin_restricted(rbo, domain, 0, 0, &ticket);
>   	if (unlikely(r != 0)) {
>   		if (r != -ERESTARTSYS)
>   			DRM_ERROR("Failed to pin framebuffer with error %d\n", r);
> -		amdgpu_bo_unreserve(rbo);
> +		ttm_eu_backoff_reservation(&ticket, &list);
>   		return r;
>   	}
>   
>   	r = amdgpu_ttm_alloc_gart(&rbo->tbo);
>   	if (unlikely(r != 0)) {
>   		amdgpu_bo_unpin(rbo);
> -		amdgpu_bo_unreserve(rbo);
> +		ttm_eu_backoff_reservation(&ticket, &list);
>   		DRM_ERROR("%p bind failed\n", rbo);
>   		return r;
>   	}
>   
>   	amdgpu_bo_get_tiling_flags(rbo, &tiling_flags);
>   
> -	amdgpu_bo_unreserve(rbo);
> +	ttm_eu_backoff_reservation(&ticket, &list);
>   
>   	afb->address = amdgpu_bo_gpu_offset(rbo);
>   
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 8502b3ed2d88..112fea8f21f9 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -766,11 +766,12 @@ EXPORT_SYMBOL(ttm_bo_eviction_valuable);
>    * b. Otherwise, trylock it.
>    */
>   static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
> -			struct ttm_operation_ctx *ctx, bool *locked)
> +			struct ttm_operation_ctx *ctx, bool *locked, bool *busy)
>   {
>   	bool ret = false;
>   
>   	*locked = false;
> +	*busy = false;
>   	if (bo->resv == ctx->resv) {
>   		reservation_object_assert_held(bo->resv);
>   		if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT
> @@ -779,6 +780,8 @@ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
>   	} else {
>   		*locked = reservation_object_trylock(bo->resv);
>   		ret = *locked;
> +		if (ret)
> +			*busy = true;
>   	}
>   
>   	return ret;
> @@ -791,16 +794,20 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
>   {
>   	struct ttm_bo_global *glob = bdev->glob;
>   	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
> -	struct ttm_buffer_object *bo = NULL;
> -	bool locked = false;
> +	struct ttm_buffer_object *bo = NULL, *first_bo = NULL;
> +	bool locked = false, list_busy = false;
>   	unsigned i;
>   	int ret;
>   
>   	spin_lock(&glob->lru_lock);
>   	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>   		list_for_each_entry(bo, &man->lru[i], lru) {
> -			if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked))
> +			bool busy = false;
> +			if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
> +							    &busy)) {
> +				list_busy |= busy;
>   				continue;
> +			}
>   
>   			if (place && !bdev->driver->eviction_valuable(bo,
>   								      place)) {
> @@ -819,8 +826,44 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
>   	}
>   
>   	if (!bo) {
> +		for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> +			if (list_empty(&man->lru[i]))
> +				continue;
> +			bo = list_first_entry(&man->lru[i],
> +					      struct ttm_buffer_object,
> +					      lru);
> +
> +			break;
> +		}

Maybe keep the first busy BO from the list walk above instead of the 
list_busy flag.

>   		spin_unlock(&glob->lru_lock);
> -		return -EBUSY;
> +		if (!list_busy || !ctx->ticket)
> +			return -EBUSY;
> +		if (ctx->interruptible) {
> +			ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock,
> +							       ctx->ticket);
> +		} else {
> +			ww_mutex_lock_slow(&bo->resv->lock, ctx->ticket);

You MUST use the ww_mutex_lock and ww_mutex_lock_interruptible funtions 
here and NOT the slow path versions.

AND then check the return code for a deadlock.

Christian.

> +			ret = 0;
> +		}
> +		if (ret)
> +			return ret;
> +		spin_lock(&glob->lru_lock);
> +		for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> +			if (list_empty(&man->lru[i]))
> +				continue;
> +			first_bo = list_first_entry(&man->lru[i],
> +						    struct ttm_buffer_object,
> +						    lru);
> +
> +			break;
> +		}
> +		spin_unlock(&glob->lru_lock);
> +		/* verify if BO have been moved */
> +		if (first_bo != bo) {
> +			ww_mutex_unlock(&bo->resv->lock);
> +			return -EBUSY;
> +		}
> +		/* ok, pick up first busy BO to wait to evict */
>   	}
>   
>   	kref_get(&bo->list_kref);
> @@ -1784,7 +1827,10 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct ttm_operation_ctx *ctx)
>   	spin_lock(&glob->lru_lock);
>   	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>   		list_for_each_entry(bo, &glob->swap_lru[i], swap) {
> -			if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked)) {
> +			bool busy = false;
> +
> +			if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
> +							   &busy)) {
>   				ret = 0;
>   				break;
>   			}
> diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
> index 49d9cdfc58f2..5bb879f117e3 100644
> --- a/include/drm/ttm/ttm_bo_api.h
> +++ b/include/drm/ttm/ttm_bo_api.h
> @@ -276,6 +276,7 @@ struct ttm_operation_ctx {
>   	bool interruptible;
>   	bool no_wait_gpu;
>   	struct reservation_object *resv;
> +	struct ww_acquire_ctx *ticket;
>   	uint64_t bytes_moved;
>   	uint32_t flags;
>   };



More information about the dri-devel mailing list