[PATCH] drm/amdgpu: skip scheduling IBs when GPU recovery
Dennis Li
Dennis.Li at amd.com
Fri Aug 21 08:57:16 UTC 2020
If GPU begin to do recovery, skip scheduling IBs. Otherwise
GPU recovery randomly fail.
Signed-off-by: Dennis Li <Dennis.Li at amd.com>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index dcfe8a3b03ff..054d7b0357fd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -212,6 +212,7 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
struct dma_fence *fence = NULL, *finished;
struct amdgpu_job *job;
int r = 0;
+ int locked;
job = to_amdgpu_job(sched_job);
finished = &job->base.s_fence->finished;
@@ -220,6 +221,10 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
trace_amdgpu_sched_run_job(job);
+ locked = down_read_trylock(&ring->adev->reset_sem);
+ if (!locked)
+ dma_fence_set_error(finished, -ECANCELED);/* skip IB as well if GPU recovery */
+
if (job->vram_lost_counter != atomic_read(&ring->adev->vram_lost_counter))
dma_fence_set_error(finished, -ECANCELED);/* skip IB as well if VRAM lost */
@@ -231,6 +236,10 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
if (r)
DRM_ERROR("Error scheduling IBs (%d)\n", r);
}
+
+ if (locked)
+ up_read(&ring->adev->reset_sem);
+
/* if gpu reset, hw fence will be replaced here */
dma_fence_put(job->fence);
job->fence = dma_fence_get(fence);
--
2.17.1
More information about the amd-gfx
mailing list