[PATCH 6/7] drm/amdgpu: recovery hw ring when gpu reset
Chunming Zhou
David1.Zhou at amd.com
Wed Jun 29 08:09:40 UTC 2016
Change-Id: I8e554d34c9e477ea255e0ed2a936397aa5f665e7
Signed-off-by: Chunming Zhou <David1.Zhou at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 +++--
drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 4 ++--
drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 23 ++++++++++++++---------
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 22 ++++++++++++++++++++++
5 files changed, 42 insertions(+), 13 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 163429c8..03b0fe7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -763,6 +763,7 @@ void amdgpu_job_free(struct amdgpu_job *job);
int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
struct amd_sched_entity *entity, void *owner,
struct fence **f);
+void amdgpu_job_recovery(struct amd_gpu_scheduler *sched);
struct amdgpu_ring {
struct amdgpu_device *adev;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 5c4691c..2c8e7f4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1994,13 +1994,14 @@ retry:
}
/* restore scratch */
amdgpu_atombios_scratch_regs_restore(adev);
- if (0) {
+ if (!r) {
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
if (!ring)
continue;
+ amdgpu_job_recovery(&ring->sched);
kthread_unpark(ring->sched.thread);
- amdgpu_ring_restore(ring, ring_sizes[i], ring_data[i]);
+ kfree(ring_data[i]);
ring_sizes[i] = 0;
ring_data[i] = NULL;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 72bf9f8..8af9903 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -366,9 +366,9 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
setup_timer(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback,
(unsigned long)ring);
- ring->fence_drv.num_fences_mask = num_hw_submission * 2 - 1;
+ ring->fence_drv.num_fences_mask = num_hw_submission * 4 - 1;
spin_lock_init(&ring->fence_drv.lock);
- ring->fence_drv.fences = kcalloc(num_hw_submission * 2, sizeof(void *),
+ ring->fence_drv.fences = kcalloc(num_hw_submission * 4, sizeof(void *),
GFP_KERNEL);
if (!ring->fence_drv.fences)
return -ENOMEM;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 34e3542..702bd9b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -193,14 +193,19 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
if (ring->funcs->emit_hdp_invalidate)
amdgpu_ring_emit_hdp_invalidate(ring);
- r = amdgpu_fence_emit(ring, &hwf);
- if (r) {
- dev_err(adev->dev, "failed to emit fence (%d)\n", r);
- if (job && job->vm_id)
- amdgpu_vm_reset_id(adev, job->vm_id);
- amdgpu_ring_undo(ring);
- return r;
- }
+ if (!job || !job->fence) {
+ r = amdgpu_fence_emit(ring, &hwf);
+ if (r) {
+ dev_err(adev->dev, "failed to emit fence (%d)\n", r);
+ if (job && job->vm_id)
+ amdgpu_vm_reset_id(adev, job->vm_id);
+ amdgpu_ring_undo(ring);
+ return r;
+ }
+ } else
+ /* re-submit fence when gpu reset */
+ amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
+ job->fence->seqno, AMDGPU_FENCE_FLAG_INT);
/* wrap the last IB with fence */
if (job && job->uf_bo) {
@@ -212,7 +217,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
}
if (f)
- *f = fence_get(hwf);
+ *f = (job && job->fence) ? job->fence : fence_get(hwf);
if (patch_offset != ~0 && ring->funcs->patch_cond_exec)
amdgpu_ring_patch_cond_exec(ring, patch_offset);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 83771c1..32fad1c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -39,6 +39,28 @@ static void amdgpu_job_timedout(struct amd_sched_job *s_job)
amdgpu_gpu_reset(job->adev);
}
+void amdgpu_job_recovery(struct amd_gpu_scheduler *sched)
+{
+ struct amd_sched_job *s_job, *tmp;
+
+ spin_lock(&sched->job_list_lock);
+ list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
+ struct amdgpu_job *job = to_amdgpu_job(s_job);
+ if (job->vm) {
+ struct amdgpu_vm_id *id = &job->adev->vm_manager.ids[job->vm_id];
+ job->vm_pd_addr = amdgpu_bo_gpu_offset(job->vm->page_directory);
+ id->pd_gpu_addr = job->vm_pd_addr;
+ }
+ sched->ops->run_job(s_job);
+ }
+ s_job = list_first_entry_or_null(&sched->ring_mirror_list,
+ struct amd_sched_job, node);
+ if (s_job)
+ schedule_delayed_work(&s_job->work_tdr, sched->timeout);
+
+ spin_unlock(&sched->job_list_lock);
+}
+
int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
struct amdgpu_job **job, struct amdgpu_vm *vm)
{
--
1.9.1
More information about the amd-gfx
mailing list