[PATCH 05/18] drm/amdgpu:make ctx_add_fence interruptible
Christian König
ckoenig.leichtzumerken at gmail.com
Mon Sep 18 09:10:11 UTC 2017
Am 18.09.2017 um 08:11 schrieb Monk Liu:
> otherwise a gpu hang will make application couldn't be killed
>
> Change-Id: I6051b5b3ae1188983f49325a2438c84a6c12374a
> Signed-off-by: Monk Liu <Monk.Liu at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ++--
> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 12 ++++++++++--
> drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 14 +++++++++-----
> 3 files changed, 21 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index cc9a232..6ff2959 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -736,8 +736,8 @@ struct amdgpu_ctx_mgr {
> struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id);
> int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
>
> -uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
> - struct dma_fence *fence);
> +int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
> + struct dma_fence *fence, uint64_t *seq);
> struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
> struct amdgpu_ring *ring, uint64_t seq);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index b59749d..4ac7a92 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -1043,6 +1043,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
> struct amd_sched_entity *entity = &p->ctx->rings[ring->idx].entity;
> struct amdgpu_job *job;
> unsigned i;
> + uint64_t seq;
> +
> int r;
>
> amdgpu_mn_lock(p->mn);
> @@ -1071,8 +1073,14 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
> job->owner = p->filp;
> job->fence_ctx = entity->fence_context;
> p->fence = dma_fence_get(&job->base.s_fence->finished);
> - cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence);
> - job->uf_sequence = cs->out.handle;
> + r = amdgpu_ctx_add_fence(p->ctx, ring, p->fence, &seq);
> + if (r) {
> + dma_fence_put(p->fence);
> + return r;
This will memory leak the job and you need to call amdgpu_mn_unlock()
before returning.
> + }
> +
> + cs->out.handle = seq;
> + job->uf_sequence = seq;
> amdgpu_job_free_resources(job);
>
> trace_amdgpu_cs_ioctl(job);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> index a11e443..97f8be4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> @@ -246,8 +246,8 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
> return 0;
> }
>
> -uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
> - struct dma_fence *fence)
> +int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
> + struct dma_fence *fence, uint64_t* handler)
> {
> struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
> uint64_t seq = cring->sequence;
> @@ -258,9 +258,11 @@ uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
> other = cring->fences[idx];
> if (other) {
> signed long r;
> - r = dma_fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT);
> - if (r < 0)
> + r = dma_fence_wait_timeout(other, true, MAX_SCHEDULE_TIMEOUT);
> + if (r < 0) {
> DRM_ERROR("Error (%ld) waiting for fence!\n", r);
Drop the extra error message here. Receiving an signal is not something
that should trigger an extra message in the logs
> + return -ERESTARTSYS;
And return the original error code here.
Apart from that looks good to me,
Christian.
> + }
> }
>
> dma_fence_get(fence);
> @@ -271,8 +273,10 @@ uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
> spin_unlock(&ctx->ring_lock);
>
> dma_fence_put(other);
> + if (handler)
> + *handler = seq;
>
> - return seq;
> + return 0;
> }
>
> struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
More information about the amd-gfx
mailing list