[PATCH 2/2] drm/amdgpu: prevent command submission failures under memory pressure

Deucher, Alexander Alexander.Deucher at amd.com
Thu Sep 1 14:33:53 UTC 2016


> -----Original Message-----
> From: amd-gfx [mailto:amd-gfx-bounces at lists.freedesktop.org] On Behalf
> Of Christian König
> Sent: Thursday, September 01, 2016 10:16 AM
> To: amd-gfx at lists.freedesktop.org
> Subject: [PATCH 2/2] drm/amdgpu: prevent command submission failures
> under memory pressure
> 
> From: Christian König <christian.koenig at amd.com>
> 
> As last resort try to evict BOs from the current working set into other
> memory domains. This effectively prevents command submission failures
> when
> VM page tables have been swapped out.
> 
> Signed-off-by: Christian König <christian.koenig at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 65
> +++++++++++++++++++++++++++++++++-
>  2 files changed, 65 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 03ba035..ee2eeaa 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1225,6 +1225,7 @@ struct amdgpu_cs_parser {
>  	struct fence			*fence;
>  	uint64_t			bytes_moved_threshold;
>  	uint64_t			bytes_moved;
> +	struct amdgpu_bo_list_entry	*evictable;
> 
>  	/* user fence */
>  	struct amdgpu_bo_list_entry	uf_entry;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 23964b8..09adc75 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -378,6 +378,60 @@ retry:
>  	}
> 
>  	return r;
> +
> +	return -ENOMEM;
> +}

Unreachable code.  Other than that looks good.  With that fixed:
Reviewed-by: Alex Deucher <alexander.deucher at amd.com>

Alex

> +
> +/* Last resort try to evict something from the current working set */
> +static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
> +				struct amdgpu_bo_list_entry *lobj)
> +{
> +	uint32_t domain = lobj->robj->allowed_domains;
> +	int r;
> +
> +	if (!p->evictable)
> +		return false;
> +
> +	for (;&p->evictable->tv.head != &p->validated;
> +	     p->evictable = list_prev_entry(p->evictable, tv.head)) {
> +
> +		struct amdgpu_bo_list_entry *candidate = p->evictable;
> +		struct amdgpu_bo *bo = candidate->robj;
> +		u64 initial_bytes_moved;
> +		uint32_t other;
> +
> +		/* If we reached our current BO we can forget it */
> +		if (candidate == lobj)
> +			break;
> +
> +		other = amdgpu_mem_type_to_domain(bo-
> >tbo.mem.mem_type);
> +
> +		/* Check if this BO is in one of the domains we need space
> for */
> +		if (!(other & domain))
> +			continue;
> +
> +		/* Check if we can move this BO somewhere else */
> +		other = bo->allowed_domains & ~domain;
> +		if (!other)
> +			continue;
> +
> +		/* Good we can try to move this BO somewhere else */
> +		amdgpu_ttm_placement_from_domain(bo, other);
> +		initial_bytes_moved = atomic64_read(&bo->adev-
> >num_bytes_moved);
> +		r = ttm_bo_validate(&bo->tbo, &bo->placement, true,
> false);
> +		p->bytes_moved += atomic64_read(&bo->adev-
> >num_bytes_moved) -
> +			initial_bytes_moved;
> +
> +		if (unlikely(r))
> +			break;
> +
> +		p->evictable = list_prev_entry(p->evictable, tv.head);
> +		list_move(&candidate->tv.head, &p->validated);
> +
> +		return true;
> +	}
> +
> +	return false;
>  }
> 
>  int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
> @@ -404,9 +458,15 @@ int amdgpu_cs_list_validate(struct
> amdgpu_cs_parser *p,
>  			binding_userptr = true;
>  		}
> 
> -		r = amdgpu_cs_bo_validate(p, bo);
> +		if (p->evictable == lobj)
> +			p->evictable = NULL;
> +
> +		do {
> +			r = amdgpu_cs_bo_validate(p, bo);
> +		} while (r == -ENOMEM && amdgpu_cs_try_evict(p, lobj));
>  		if (r)
>  			return r;
> +
>  		if (bo->shadow) {
>  			r = amdgpu_cs_bo_validate(p, bo);
>  			if (r)
> @@ -534,6 +594,9 @@ static int amdgpu_cs_parser_bos(struct
> amdgpu_cs_parser *p,
> 
>  	p->bytes_moved_threshold =
> amdgpu_cs_get_threshold_for_moves(p->adev);
>  	p->bytes_moved = 0;
> +	p->evictable = list_last_entry(&p->validated,
> +				       struct amdgpu_bo_list_entry,
> +				       tv.head);
> 
>  	r = amdgpu_cs_list_validate(p, &duplicates);
>  	if (r) {
> --
> 2.5.0
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


More information about the amd-gfx mailing list