[PATCH v2 2/2] drm/amdgpu: Move old fence waiting before reservation lock is aquired.
Christian König
christian.koenig at amd.com
Wed Oct 11 07:25:23 UTC 2017
Yes, the mutex is mandatory.
As I explained before it doesn't matter what userspace is doing, the
kernel IOCTL must always be thread safe.
Otherwise userspace could force the kernel to run into a BUG_ON() or worse.
Additional to that we already use an CS interface upstream which doesn't
have a pthread_mutex any more.
Regards,
Christian.
Am 11.10.2017 um 05:28 schrieb Liu, Monk:
> Hi Andrey & Christian
>
> Do we really need the mutext lock here?
> Libdrm_amdgpu already has the pthread_mutext to protect multi-thread racing issues, kernel side should be safe with that
>
> BR Monk
>
> -----Original Message-----
> From: Andrey Grodzovsky [mailto:andrey.grodzovsky at amd.com]
> Sent: Wednesday, October 11, 2017 4:50 AM
> To: Koenig, Christian <Christian.Koenig at amd.com>; Liu, Monk <Monk.Liu at amd.com>; amd-gfx at lists.freedesktop.org
> Cc: Grodzovsky, Andrey <Andrey.Grodzovsky at amd.com>
> Subject: [PATCH v2 2/2] drm/amdgpu: Move old fence waiting before reservation lock is aquired.
>
> Helps avoiding deadlock during GPU reset.
> Added mutex to amdgpu_ctx to preserve order of fences on a ring.
>
> v2:
> Put waiting logic in a function in a seperate function in amdgpu_ctx.c
>
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 8 ++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 30 ++++++++++++++++++++++++------
> 3 files changed, 34 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index da48f97..235eca5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -741,6 +741,7 @@ struct amdgpu_ctx {
> bool preamble_presented;
> enum amd_sched_priority init_priority;
> enum amd_sched_priority override_priority;
> + struct mutex lock;
> };
>
> struct amdgpu_ctx_mgr {
> @@ -763,9 +764,12 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
> struct drm_file *filp);
>
> +int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned
> +ring_id);
> +
> void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr); void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
>
> +
> /*
> * file private structure
> */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 1a54e53..c36297c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -90,6 +90,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
> goto free_chunk;
> }
>
> + mutex_lock(&p->ctx->lock);
> +
> /* get chunks */
> chunk_array_user = u64_to_user_ptr(cs->in.chunks);
> if (copy_from_user(chunk_array, chunk_array_user, @@ -737,8 +739,10 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
>
> dma_fence_put(parser->fence);
>
> - if (parser->ctx)
> + if (parser->ctx) {
> + mutex_unlock(&parser->ctx->lock);
> amdgpu_ctx_put(parser->ctx);
> + }
>
> if (parser->bo_list)
> amdgpu_bo_list_put(parser->bo_list);
> @@ -992,7 +996,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
> parser->job->ring->funcs->type == AMDGPU_RING_TYPE_VCE))
> return -EINVAL;
>
> - return 0;
> + return amdgpu_ctx_wait_prev_fence(parser->ctx,
> +parser->job->ring->idx);
> }
>
> static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> index a78b03f6..4309820 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> @@ -67,6 +67,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
> if (!ctx->fences)
> return -ENOMEM;
>
> + mutex_init(&ctx->lock);
> +
> for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
> ctx->rings[i].sequence = 1;
> ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i]; @@ -126,6 +128,8 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
> &ctx->rings[i].entity);
>
> amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
> +
> + mutex_destroy(&ctx->lock);
> }
>
> static int amdgpu_ctx_alloc(struct amdgpu_device *adev, @@ -296,12 +300,8 @@ int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
>
> idx = seq & (amdgpu_sched_jobs - 1);
> other = cring->fences[idx];
> - if (other) {
> - signed long r;
> - r = dma_fence_wait_timeout(other, true, MAX_SCHEDULE_TIMEOUT);
> - if (r < 0)
> - return r;
> - }
> + if (other)
> + BUG_ON(!dma_fence_is_signaled(other));
>
> dma_fence_get(fence);
>
> @@ -372,6 +372,24 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
> }
> }
>
> +int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned
> +ring_id) {
> + struct amdgpu_ctx_ring *cring = &ctx->rings[ring_id];
> + unsigned idx = cring->sequence & (amdgpu_sched_jobs - 1);
> + struct dma_fence *other = cring->fences[idx];
> +
> + if (other) {
> + signed long r;
> + r = dma_fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT);
> + if (r < 0) {
> + DRM_ERROR("Error (%ld) waiting for fence!\n", r);
> + return r;
> + }
> + }
> +
> + return 0;
> +}
> +
> void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr) {
> mutex_init(&mgr->lock);
> --
> 2.7.4
>
More information about the amd-gfx
mailing list