[PATCH] drm/amdgpu: avoid duplicated tmo report on same job

Monk Liu Monk.Liu at amd.com
Fri May 10 07:17:48 UTC 2019


Signed-off-by: Monk Liu <Monk.Liu at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  3 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c    | 12 +++++++++++-
 include/drm/gpu_scheduler.h                |  1 +
 3 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index d6286ed..f1dc0ba 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3356,8 +3356,7 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
 	return true;
 
 disabled:
-		DRM_INFO("GPU recovery disabled.\n");
-		return false;
+	return false;
 }
 
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 1397942..ca62253 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -33,6 +33,7 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
 	struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
 	struct amdgpu_job *job = to_amdgpu_job(s_job);
 	struct amdgpu_task_info ti;
+	bool recover;
 
 	memset(&ti, 0, sizeof(struct amdgpu_task_info));
 
@@ -42,6 +43,11 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
 		return;
 	}
 
+	recover = amdgpu_device_should_recover_gpu(ring->adev);
+	if (s_job->sched->last_tmo_id == s_job->id)
+		goto skip_report;
+
+	s_job->sched->last_tmo_id = s_job->id;
 	amdgpu_vm_get_task_info(ring->adev, job->pasid, &ti);
 	DRM_ERROR("ring %s timeout, signaled seq=%u, emitted seq=%u\n",
 		  job->base.sched->name, atomic_read(&ring->fence_drv.last_seq),
@@ -49,7 +55,11 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
 	DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n",
 		  ti.process_name, ti.tgid, ti.task_name, ti.pid);
 
-	if (amdgpu_device_should_recover_gpu(ring->adev))
+	if (!recover)
+		DRM_INFO("GPU recovery disabled.\n");
+
+skip_report:
+	if (recover)
 		amdgpu_device_gpu_recover(ring->adev, job);
 }
 
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 9c2a957..1944d27 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -282,6 +282,7 @@ struct drm_gpu_scheduler {
 	int				hang_limit;
 	atomic_t                        num_jobs;
 	bool			ready;
+	uint64_t last_tmo_id;
 };
 
 int drm_sched_init(struct drm_gpu_scheduler *sched,
-- 
2.7.4



More information about the amd-gfx mailing list