[PATCH 10/12] drm/amdgpu/sriov:implement guilty ctx for loose reset
Monk Liu
Monk.Liu at amd.com
Sat Sep 30 06:03:50 UTC 2017
Change-Id: I7904f362aa0f578a5cbf5d40c7a242c2c6680a92
Signed-off-by: Monk Liu <Monk.Liu at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 16 +++++++++-------
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 1 +
drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 22 ++++++++++++++++++++++
drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 1 +
5 files changed, 34 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index b40d4ba..b63e602 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -737,6 +737,7 @@ struct amdgpu_ctx {
struct dma_fence **fences;
struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS];
bool preamble_presented;
+ bool guilty;
};
struct amdgpu_ctx_mgr {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 6a1515e..f92962e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -79,16 +79,19 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
if (cs->in.num_chunks == 0)
return 0;
+ p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id);
+ if (!p->ctx)
+ return -EINVAL;
+
+ if (amdgpu_sriov_vf(p->adev) &&
+ amdgpu_sriov_reset_level == 0 &&
+ p->ctx->guilty)
+ return -ENODEV;
+
chunk_array = kmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL);
if (!chunk_array)
return -ENOMEM;
- p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id);
- if (!p->ctx) {
- ret = -EINVAL;
- goto free_chunk;
- }
-
/* get chunks */
chunk_array_user = u64_to_user_ptr(cs->in.chunks);
if (copy_from_user(chunk_array, chunk_array_user,
@@ -184,7 +187,6 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
p->nchunks = 0;
put_ctx:
amdgpu_ctx_put(p->ctx);
-free_chunk:
kfree(chunk_array);
return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 75c933b..028e9f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -60,6 +60,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx)
rq, amdgpu_sched_jobs);
if (r)
goto failed;
+ ctx->rings[i].entity.guilty = &ctx->guilty;
}
r = amdgpu_queue_mgr_init(adev, &ctx->queue_mgr);
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index 12c3092..89b0573 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -493,10 +493,32 @@ void amd_sched_set_queue_hang(struct amd_gpu_scheduler *sched)
void amd_sched_job_kickout(struct amd_sched_job *s_job)
{
struct amd_gpu_scheduler *sched = s_job->sched;
+ struct amd_sched_entity *entity, *tmp;
+ struct amd_sched_rq *rq;
+ int i;
+ bool found;
spin_lock(&sched->job_list_lock);
list_del_init(&s_job->node);
spin_unlock(&sched->job_list_lock);
+
+ dma_fence_set_error(&s_job->s_fence->finished, -ETIME);
+
+ for (i = AMD_SCHED_PRIORITY_MIN; i < AMD_SCHED_PRIORITY_KERNEL; i++) {
+ rq = &sched->sched_rq[i];
+
+ spin_lock(&rq->lock);
+ list_for_each_entry_safe(entity, tmp, &rq->entities, list) {
+ if (s_job->s_entity == entity && entity->guilty) {
+ *entity->guilty = true;
+ found = true;
+ break;
+ }
+ }
+ spin_unlock(&rq->lock);
+ if (found)
+ break;
+ }
}
void amd_sched_job_recovery(struct amd_gpu_scheduler *sched)
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
index f0242aa..16c2244 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
@@ -49,6 +49,7 @@ struct amd_sched_entity {
struct dma_fence *dependency;
struct dma_fence_cb cb;
+ bool *guilty; /* this points to ctx's guilty */
};
/**
--
2.7.4
More information about the amd-gfx
mailing list