[PATCH 1/2] drm/amdgpu:resolv deadlock between reset and cs_ioctl v4.
Andrey Grodzovsky
andrey.grodzovsky at amd.com
Fri Oct 6 18:20:20 UTC 2017
From: Monk Liu <Monk.Liu at amd.com>
need to unreserve ttm bo before "cs_add_fence" and "entity_push_job"
otherwise there will be deadlock between "recover_vram_from_shadow"
and previous two routines on the ttm bo's resv lock.
v2:
Add per ctx mutex.
v3:
Rellocate mutex aquisition into amdgpu_cs_parser_init and muex release
into amdgpu_cs_parser_fini to avoid nested locking lockup.
Add rollback code for amdgpu_ctx_add_fence in case of error or signal
interruption.
v4:
Refactor amdgpu_cs_ib_vm_chunk and amdgpu_cs_ib_fill to enable
old fence waiting before reservation lock is aquired.
Change-Id: Ia209beab5036bfc2c38cbf18324fa3efd4bab1cf
Signed-off-by: Monk Liu <Monk.Liu at amd.com>
Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 164 ++++++++++++++++++--------------
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 4 +
3 files changed, 100 insertions(+), 69 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 53d8df3..baa2953 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -724,6 +724,7 @@ struct amdgpu_ctx {
struct dma_fence **fences;
struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS];
bool preamble_presented;
+ struct mutex lock;
};
struct amdgpu_ctx_mgr {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 9f1202a..0fa1bc7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -89,6 +89,9 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
goto free_chunk;
}
+
+ mutex_lock(&p->ctx->lock);
+
/* get chunks */
chunk_array_user = u64_to_user_ptr(cs->in.chunks);
if (copy_from_user(chunk_array, chunk_array_user,
@@ -715,28 +718,21 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
/**
* cs_parser_fini() - clean parser states
* @parser: parser structure holding parsing context.
- * @error: error number
- *
- * If error is set than unvalidate buffer, otherwise just free memory
- * used by parsing context.
**/
-static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
- bool backoff)
+static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser)
{
unsigned i;
- if (error && backoff)
- ttm_eu_backoff_reservation(&parser->ticket,
- &parser->validated);
-
for (i = 0; i < parser->num_post_dep_syncobjs; i++)
drm_syncobj_put(parser->post_dep_syncobjs[i]);
kfree(parser->post_dep_syncobjs);
dma_fence_put(parser->fence);
- if (parser->ctx)
+ if (parser->ctx) {
+ mutex_unlock(&parser->ctx->lock);
amdgpu_ctx_put(parser->ctx);
+ }
if (parser->bo_list)
amdgpu_bo_list_put(parser->bo_list);
@@ -843,7 +839,72 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
struct amdgpu_ring *ring = p->job->ring;
- int i, r;
+ int i, j, r;
+
+ for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
+
+ struct amdgpu_cs_chunk *chunk;
+ struct amdgpu_ib *ib;
+ struct drm_amdgpu_cs_chunk_ib *chunk_ib;
+
+ chunk = &p->chunks[i];
+ ib = &p->job->ibs[j];
+ chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
+
+ if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
+ continue;
+
+ if (p->job->ring->funcs->parse_cs) {
+ struct amdgpu_bo_va_mapping *m;
+ struct amdgpu_bo *aobj = NULL;
+ uint64_t offset;
+ uint8_t *kptr;
+
+ r = amdgpu_cs_find_mapping(p, chunk_ib->va_start,
+ &aobj, &m);
+ if (r) {
+ DRM_ERROR("IB va_start is invalid\n");
+ return r;
+ }
+
+ if ((chunk_ib->va_start + chunk_ib->ib_bytes) >
+ (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
+ DRM_ERROR("IB va_start+ib_bytes is invalid\n");
+ return -EINVAL;
+ }
+
+ /* the IB should be reserved at this point */
+ r = amdgpu_bo_kmap(aobj, (void **)&kptr);
+ if (r) {
+ return r;
+ }
+
+ offset = m->start * AMDGPU_GPU_PAGE_SIZE;
+ kptr += chunk_ib->va_start - offset;
+
+ r = amdgpu_ib_get(adev, vm, chunk_ib->ib_bytes, ib);
+ if (r) {
+ DRM_ERROR("Failed to get ib !\n");
+ return r;
+ }
+
+ memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
+ amdgpu_bo_kunmap(aobj);
+ } else {
+ r = amdgpu_ib_get(adev, vm, 0, ib);
+ if (r) {
+ DRM_ERROR("Failed to get ib !\n");
+ return r;
+ }
+
+ }
+
+ ib->gpu_addr = chunk_ib->va_start;
+ ib->length_dw = chunk_ib->ib_bytes / 4;
+ ib->flags = chunk_ib->flags;
+ j++;
+
+ }
/* Only for UVD/VCE VM emulation */
if (ring->funcs->parse_cs) {
@@ -868,19 +929,15 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
struct amdgpu_cs_parser *parser)
{
- struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
- struct amdgpu_vm *vm = &fpriv->vm;
int i, j;
int r, ce_preempt = 0, de_preempt = 0;
for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) {
struct amdgpu_cs_chunk *chunk;
- struct amdgpu_ib *ib;
struct drm_amdgpu_cs_chunk_ib *chunk_ib;
struct amdgpu_ring *ring;
chunk = &parser->chunks[i];
- ib = &parser->job->ibs[j];
chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
@@ -917,54 +974,6 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
parser->job->ring = ring;
- if (ring->funcs->parse_cs) {
- struct amdgpu_bo_va_mapping *m;
- struct amdgpu_bo *aobj = NULL;
- uint64_t offset;
- uint8_t *kptr;
-
- r = amdgpu_cs_find_mapping(parser, chunk_ib->va_start,
- &aobj, &m);
- if (r) {
- DRM_ERROR("IB va_start is invalid\n");
- return r;
- }
-
- if ((chunk_ib->va_start + chunk_ib->ib_bytes) >
- (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
- DRM_ERROR("IB va_start+ib_bytes is invalid\n");
- return -EINVAL;
- }
-
- /* the IB should be reserved at this point */
- r = amdgpu_bo_kmap(aobj, (void **)&kptr);
- if (r) {
- return r;
- }
-
- offset = m->start * AMDGPU_GPU_PAGE_SIZE;
- kptr += chunk_ib->va_start - offset;
-
- r = amdgpu_ib_get(adev, vm, chunk_ib->ib_bytes, ib);
- if (r) {
- DRM_ERROR("Failed to get ib !\n");
- return r;
- }
-
- memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
- amdgpu_bo_kunmap(aobj);
- } else {
- r = amdgpu_ib_get(adev, vm, 0, ib);
- if (r) {
- DRM_ERROR("Failed to get ib !\n");
- return r;
- }
-
- }
-
- ib->gpu_addr = chunk_ib->va_start;
- ib->length_dw = chunk_ib->ib_bytes / 4;
- ib->flags = chunk_ib->flags;
j++;
}
@@ -1160,14 +1169,26 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
amdgpu_cs_post_dependencies(p);
+
+ /* hook sched fence to all BOs' reservation in validated list
+ * and unreserve them.
+ *
+ * we unreserve at here is because otherwise
+ * there'll be deadlock between ctx_add_fence/sched_entity_push_job
+ * and gpu_reset routine's recover_bo_from_shadow on PD/PTEs' ttm bo lock
+ */
+ ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
+
+
cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence);
+
+
job->uf_sequence = cs->out.handle;
amdgpu_job_free_resources(job);
trace_amdgpu_cs_ioctl(job);
amd_sched_entity_push_job(&job->base);
- ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
amdgpu_mn_unlock(p->mn);
return 0;
@@ -1189,6 +1210,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
parser.adev = adev;
parser.filp = filp;
+ fpriv = filp->driver_priv;
r = amdgpu_cs_parser_init(&parser, data);
if (r) {
@@ -1196,6 +1218,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
goto out;
}
+ r = amdgpu_cs_ib_fill(adev, &parser);
+ if (r)
+ goto out;
+
r = amdgpu_cs_parser_bos(&parser, data);
if (r) {
if (r == -ENOMEM)
@@ -1206,9 +1232,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
}
reserved_buffers = true;
- r = amdgpu_cs_ib_fill(adev, &parser);
- if (r)
- goto out;
r = amdgpu_cs_dependencies(adev, &parser);
if (r) {
@@ -1226,7 +1249,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
r = amdgpu_cs_submit(&parser, cs);
out:
- amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
+ if (r && reserved_buffers)
+ ttm_eu_backoff_reservation(&parser.ticket, &parser.validated);
+
+ amdgpu_cs_parser_fini(&parser);
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index a11e443..c073a68 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -39,6 +39,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx)
if (!ctx->fences)
return -ENOMEM;
+ mutex_init(&ctx->lock);
+
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
ctx->rings[i].sequence = 1;
ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i];
@@ -96,6 +98,8 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
&ctx->rings[i].entity);
amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
+
+ mutex_destroy(&ctx->lock);
}
static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
--
2.7.4
More information about the amd-gfx
mailing list