[PATCH 1/3] drm/amdgpu:make ctx_add_fence interruptible(v2)

Christian König ckoenig.leichtzumerken at gmail.com
Tue Sep 19 11:42:28 UTC 2017


Am 19.09.2017 um 08:41 schrieb Monk Liu:
> otherwise a gpu hang will make application couldn't be killed
> under timedout=0 mode
>
> v2:
> Fix memoryleak job/job->s_fence issue
> unlock mn
> remove the ERROR msg after waiting being interrupted
>
> Change-Id: I6051b5b3ae1188983f49325a2438c84a6c12374a
> Signed-off-by: Monk Liu <Monk.Liu at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  4 ++--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 17 +++++++++++++++--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 12 +++++++-----
>   3 files changed, 24 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index cc9a232..6ff2959 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -736,8 +736,8 @@ struct amdgpu_ctx_mgr {
>   struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id);
>   int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
>   
> -uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
> -			      struct dma_fence *fence);
> +int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
> +			      struct dma_fence *fence, uint64_t *seq);
>   struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
>   				   struct amdgpu_ring *ring, uint64_t seq);
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index b59749d..9bd4834 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -1043,6 +1043,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>   	struct amd_sched_entity *entity = &p->ctx->rings[ring->idx].entity;
>   	struct amdgpu_job *job;
>   	unsigned i;
> +	uint64_t seq;
> +
>   	int r;
>   
>   	amdgpu_mn_lock(p->mn);
> @@ -1071,8 +1073,19 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>   	job->owner = p->filp;
>   	job->fence_ctx = entity->fence_context;
>   	p->fence = dma_fence_get(&job->base.s_fence->finished);
> -	cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence);
> -	job->uf_sequence = cs->out.handle;
> +	r = amdgpu_ctx_add_fence(p->ctx, ring, p->fence, &seq);
> +	if (r) {
> +		/* release job include the sched fence as well */
> +		dma_fence_put(&job->base.s_fence->finished);
> +		dma_fence_put(&job->base.s_fence->scheduled);
> +		amdgpu_job_free(job);
> +		amdgpu_mn_unlock(p->mn);
> +		dma_fence_put(p->fence);
> +		return r;
> +	}
> +
> +	cs->out.handle = seq;
> +	job->uf_sequence = seq;
>   	amdgpu_job_free_resources(job);
>   
>   	trace_amdgpu_cs_ioctl(job);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> index a11e443..551f114 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> @@ -246,8 +246,8 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
>   	return 0;
>   }
>   
> -uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
> -			      struct dma_fence *fence)
> +int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
> +			      struct dma_fence *fence, uint64_t* handler)
>   {
>   	struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
>   	uint64_t seq = cring->sequence;
> @@ -258,9 +258,9 @@ uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
>   	other = cring->fences[idx];
>   	if (other) {
>   		signed long r;
> -		r = dma_fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT);
> +		r = dma_fence_wait_timeout(other, true, MAX_SCHEDULE_TIMEOUT);
>   		if (r < 0)
> -			DRM_ERROR("Error (%ld) waiting for fence!\n", r);
> +			return -ERESTARTSYS;

Return the original error code here, e.g. "r".

With that fixed the patch is Reviewed-by: Christian König 
<christian.koenig at amd.com>

Regards,
Christian.

>   	}
>   
>   	dma_fence_get(fence);
> @@ -271,8 +271,10 @@ uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
>   	spin_unlock(&ctx->ring_lock);
>   
>   	dma_fence_put(other);
> +	if (handler)
> +		*handler = seq;
>   
> -	return seq;
> +	return 0;
>   }
>   
>   struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,




More information about the amd-gfx mailing list