[PATCH] drm/amdgpu: skip scheduling IBs when GPU recovery

Dennis Li Dennis.Li at amd.com
Fri Aug 21 08:57:16 UTC 2020


If GPU begin to do recovery, skip scheduling IBs. Otherwise
GPU recovery randomly fail.

Signed-off-by: Dennis Li <Dennis.Li at amd.com>

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index dcfe8a3b03ff..054d7b0357fd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -212,6 +212,7 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
 	struct dma_fence *fence = NULL, *finished;
 	struct amdgpu_job *job;
 	int r = 0;
+	int locked;
 
 	job = to_amdgpu_job(sched_job);
 	finished = &job->base.s_fence->finished;
@@ -220,6 +221,10 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
 
 	trace_amdgpu_sched_run_job(job);
 
+	locked = down_read_trylock(&ring->adev->reset_sem);
+	if (!locked)
+		dma_fence_set_error(finished, -ECANCELED);/* skip IB as well if GPU recovery */
+
 	if (job->vram_lost_counter != atomic_read(&ring->adev->vram_lost_counter))
 		dma_fence_set_error(finished, -ECANCELED);/* skip IB as well if VRAM lost */
 
@@ -231,6 +236,10 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
 		if (r)
 			DRM_ERROR("Error scheduling IBs (%d)\n", r);
 	}
+
+	if (locked)
+		up_read(&ring->adev->reset_sem);
+
 	/* if gpu reset, hw fence will be replaced here */
 	dma_fence_put(job->fence);
 	job->fence = dma_fence_get(fence);
-- 
2.17.1



More information about the amd-gfx mailing list