[PATCH 4/5] drm/amdgpu:change sriov_gpu_reset interface
Monk Liu
Monk.Liu at amd.com
Mon May 1 07:22:50 UTC 2017
1,add new parm @job to indicate which ring hang.
2,return when calling to amdgpu_gpu_reset if under SRIOV case
with this patch, we'll reset & recovery on the perticular ring
instead of overkill all ring.
Change-Id: I37dff5c16ef161e3a8425434516ebb285fbe6600
Signed-off-by: Monk Liu <Monk.Liu at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 7 ++++---
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 6 +++++-
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 2 +-
drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 2 +-
drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c | 2 +-
5 files changed, 12 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 0c51fb5..157d023 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2528,6 +2528,7 @@ static int amdgpu_recover_vram_from_shadow(struct amdgpu_device *adev,
* amdgpu_sriov_gpu_reset - reset the asic
*
* @adev: amdgpu device pointer
+ * @job: the job lead to hang
* @voluntary: if this reset is requested by guest.
* (true means by guest and false means by HYPERVISOR )
*
@@ -2535,9 +2536,9 @@ static int amdgpu_recover_vram_from_shadow(struct amdgpu_device *adev,
* for SRIOV case.
* Returns 0 for success or an error on failure.
*/
-int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary)
+int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job, bool voluntary)
{
- int i, r = 0;
+ int i, j, r = 0;
int resched;
struct amdgpu_bo *bo, *tmp;
struct amdgpu_ring *ring;
@@ -2652,7 +2653,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
bool need_full_reset;
if (amdgpu_sriov_vf(adev))
- return amdgpu_sriov_gpu_reset(adev, true);
+ return -EINVAL;
if (!amdgpu_check_soft_reset(adev)) {
DRM_INFO("No hardware hang detected. Did some blocks stall?\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 0209c96..2cc7c63 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -36,7 +36,11 @@ static void amdgpu_job_timedout(struct amd_sched_job *s_job)
job->base.sched->name,
atomic_read(&job->ring->fence_drv.last_seq),
job->ring->fence_drv.sync_seq);
- amdgpu_gpu_reset(job->adev);
+
+ if (amdgpu_sriov_vf(job->adev))
+ amdgpu_sriov_gpu_reset(job->adev, job, true);
+ else
+ amdgpu_gpu_reset(job->adev);
}
int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index a8ed162..2b641eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -97,7 +97,7 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v);
int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init);
int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init);
int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
-int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary);
+int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job, bool voluntary);
int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev);
void amdgpu_virt_free_mm_table(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index e967a7b..4d71db3 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -243,7 +243,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
}
/* Trigger recovery due to world switch failure */
- amdgpu_sriov_gpu_reset(adev, false);
+ amdgpu_sriov_gpu_reset(adev, NULL, false);
}
static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
index f0d64f1..a604bd1 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
@@ -514,7 +514,7 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work)
}
/* Trigger recovery due to world switch failure */
- amdgpu_sriov_gpu_reset(adev, false);
+ amdgpu_sriov_gpu_reset(adev, NULL, false);
}
static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev,
--
2.7.4
More information about the amd-gfx
mailing list