[PATCH] drm/amdgpu: fix the ib test hang when gfx is in "idle" state

Huang Rui ray.huang at amd.com
Fri Apr 20 09:40:55 UTC 2018


"aaabaf4   drm/amdgpu: defer test IBs on the rings at boot (V3)"
Above patch defers the execution of gfx/compute ib tests. However, at that time,
the gfx may already go into idle state. If "idle" gfx receives command
submission, it will get hang in the system. So we must add is_gfx_on checking at
start of ib tests.

Signed-off-by: Huang Rui <ray.huang at amd.com>
Cc: Shirish S <shirish.s at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h               |  2 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c             | 19 ++++++++++++++++++-
 drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c | 16 ++--------------
 3 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 59df4b7..a0263b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -905,6 +905,7 @@ struct amdgpu_gfx_funcs {
 	void (*read_wave_vgprs)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t thread, uint32_t start, uint32_t size, uint32_t *dst);
 	void (*read_wave_sgprs)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst);
 	void (*select_me_pipe_q)(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue);
+	bool (*is_gfx_on)(struct amdgpu_device *adev);
 };
 
 struct amdgpu_ngg_buf {
@@ -1855,6 +1856,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_gds_switch(adev, r, v, d, w, a) (adev)->gds.funcs->patch_gds_switch((r), (v), (d), (w), (a))
 #define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i))
 #define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q) (adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q))
+#define amdgpu_gfx_is_gfx_on(adev) (adev)->gfx.funcs->is_gfx_on((adev))
 
 /* Common functions */
 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 6c2d278..a71d711 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -342,6 +342,18 @@ static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
 	return r;
 }
 
+static bool gfx_v9_0_is_gfx_on(struct amdgpu_device *adev)
+{
+	uint32_t reg;
+
+	reg = RREG32_SOC15(PWR, 0, mmPWR_MISC_CNTL_STATUS);
+	if ((reg & PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK) ==
+	    (0x2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT))
+		return true;
+
+	return false;
+}
+
 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
 	struct amdgpu_device *adev = ring->adev;
@@ -353,6 +365,10 @@ static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	uint32_t tmp;
 	long r;
 
+	/* confirm gfx is not in "idle" state */
+	if (!amdgpu_gfx_is_gfx_on(adev))
+		return 0;
+
 	r = amdgpu_device_wb_get(adev, &index);
 	if (r) {
 		dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
@@ -1085,7 +1101,8 @@ static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
 	.read_wave_data = &gfx_v9_0_read_wave_data,
 	.read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
 	.read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
-	.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q
+	.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
+	.is_gfx_on = &gfx_v9_0_is_gfx_on
 };
 
 static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
index 7712eb6..3553fba 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
@@ -254,28 +254,16 @@ static int smu10_power_off_asic(struct pp_hwmgr *hwmgr)
 	return smu10_reset_cc6_data(hwmgr);
 }
 
-static bool smu10_is_gfx_on(struct pp_hwmgr *hwmgr)
-{
-	uint32_t reg;
-	struct amdgpu_device *adev = hwmgr->adev;
-
-	reg = RREG32_SOC15(PWR, 0, mmPWR_MISC_CNTL_STATUS);
-	if ((reg & PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK) ==
-	    (0x2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT))
-		return true;
-
-	return false;
-}
-
 static int smu10_disable_gfx_off(struct pp_hwmgr *hwmgr)
 {
 	struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend);
+	struct amdgpu_device *adev = hwmgr->adev;
 
 	if (smu10_data->gfx_off_controled_by_driver) {
 		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_DisableGfxOff);
 
 		/* confirm gfx is back to "on" state */
-		while (!smu10_is_gfx_on(hwmgr))
+		while (!amdgpu_gfx_is_gfx_on(adev))
 			msleep(1);
 	}
 
-- 
2.7.4



More information about the amd-gfx mailing list