[PATCH v2] drm/amdgpu: fix the ib test hang when gfx is in "idle" state

Huang Rui ray.huang at amd.com
Tue Apr 24 02:49:32 UTC 2018


"aaabaf4   drm/amdgpu: defer test IBs on the rings at boot (V3)"
Above patch defers the execution of gfx/compute ib tests. However, at that time,
the gfx may already go into idle state. If "idle" gfx receives command
submission, then will get hang in the system. And it still has issue to
dynamically enable/disable gfxoff at runtime. So we have to use a workaround to
skip the gfx/compute ib tests when gfx is already in "idle" state.

Signed-off-by: Huang Rui <ray.huang at amd.com>
Cc: Shirish S <shirish.s at amd.com>
---

Changes from V1 -> V2:
- Remove unused definitions of smu10_hwmgr.
- Add WA descriptions into commit log and comments.

---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h               |  2 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c             | 23 ++++++++++++++++++++++-
 drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c | 22 ++--------------------
 3 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 59df4b7..a0263b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -905,6 +905,7 @@ struct amdgpu_gfx_funcs {
 	void (*read_wave_vgprs)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t thread, uint32_t start, uint32_t size, uint32_t *dst);
 	void (*read_wave_sgprs)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst);
 	void (*select_me_pipe_q)(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue);
+	bool (*is_gfx_on)(struct amdgpu_device *adev);
 };
 
 struct amdgpu_ngg_buf {
@@ -1855,6 +1856,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_gds_switch(adev, r, v, d, w, a) (adev)->gds.funcs->patch_gds_switch((r), (v), (d), (w), (a))
 #define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i))
 #define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q) (adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q))
+#define amdgpu_gfx_is_gfx_on(adev) (adev)->gfx.funcs->is_gfx_on((adev))
 
 /* Common functions */
 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 2c5e2a4..b8bd194 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -342,6 +342,18 @@ static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
 	return r;
 }
 
+static bool gfx_v9_0_is_gfx_on(struct amdgpu_device *adev)
+{
+	uint32_t reg;
+
+	reg = RREG32_SOC15(PWR, 0, mmPWR_MISC_CNTL_STATUS);
+	if ((reg & PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK) ==
+	    (0x2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT))
+		return true;
+
+	return false;
+}
+
 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
 	struct amdgpu_device *adev = ring->adev;
@@ -353,6 +365,14 @@ static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	uint32_t tmp;
 	long r;
 
+	/*
+	 * FIXME: It still has issue to dynamically enable/disable gfxoff at
+	 * runtime. So it has to skip the gfx/compute ib test when gfx is
+	 * already in "idle" state.
+	 */
+	if (!amdgpu_gfx_is_gfx_on(adev))
+		return 0;
+
 	r = amdgpu_device_wb_get(adev, &index);
 	if (r) {
 		dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
@@ -1085,7 +1105,8 @@ static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
 	.read_wave_data = &gfx_v9_0_read_wave_data,
 	.read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
 	.read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
-	.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q
+	.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
+	.is_gfx_on = &gfx_v9_0_is_gfx_on
 };
 
 static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
index 7712eb6..48c17fb 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
@@ -42,12 +42,6 @@
 #define SMU10_DISPCLK_BYPASS_THRESHOLD     10000 /* 100Mhz */
 #define SMC_RAM_END                     0x40000
 
-#define mmPWR_MISC_CNTL_STATUS					0x0183
-#define mmPWR_MISC_CNTL_STATUS_BASE_IDX				0
-#define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT	0x0
-#define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT		0x1
-#define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK		0x00000001L
-#define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK		0x00000006L
 
 static const unsigned long SMU10_Magic = (unsigned long) PHM_Rv_Magic;
 
@@ -254,28 +248,16 @@ static int smu10_power_off_asic(struct pp_hwmgr *hwmgr)
 	return smu10_reset_cc6_data(hwmgr);
 }
 
-static bool smu10_is_gfx_on(struct pp_hwmgr *hwmgr)
-{
-	uint32_t reg;
-	struct amdgpu_device *adev = hwmgr->adev;
-
-	reg = RREG32_SOC15(PWR, 0, mmPWR_MISC_CNTL_STATUS);
-	if ((reg & PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK) ==
-	    (0x2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT))
-		return true;
-
-	return false;
-}
-
 static int smu10_disable_gfx_off(struct pp_hwmgr *hwmgr)
 {
 	struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend);
+	struct amdgpu_device *adev = hwmgr->adev;
 
 	if (smu10_data->gfx_off_controled_by_driver) {
 		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_DisableGfxOff);
 
 		/* confirm gfx is back to "on" state */
-		while (!smu10_is_gfx_on(hwmgr))
+		while (!amdgpu_gfx_is_gfx_on(adev))
 			msleep(1);
 	}
 
-- 
2.7.4



More information about the amd-gfx mailing list