[PATCH 2/6] drm/amdgpu:need som change on vega10 mailbox

Mon May 1 06:34:35 UTC 2017

if sriov gpu reset is invoked by job timeout, it is run
in a global work-queue which is very slow and better not call
msleep ortherwise it takes long time to get back CPU.

so make below changes:

1: Change msleep 1 to mdelay 5
2: Ignore the ack fail from pf after time out,
   because VF FLR will clear ack, sometime VF FLR is done
   prior to the beginning of poll_ack so we can ignore this ack

TODO:
Put job_timedout (and the following gpu reset) in a driver thread,
instead of the global work_struct.

Change-Id: I4608c67b55c67c88597e03eee35a126d7e850839
Signed-off-by: Monk Liu <Monk.Liu at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 18 ++++++++++--------
 drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c | 10 +++++-----
 2 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index 712f36e..e967a7b 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -124,8 +124,8 @@ static int xgpu_ai_poll_ack(struct amdgpu_device *adev)
 			r = -ETIME;
 			break;
 		}
-		msleep(1);
-		timeout -= 1;
+		mdelay(5);
+		timeout -= 5;
 
 		reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
 						     mmBIF_BX_PF0_MAILBOX_CONTROL));
@@ -141,12 +141,12 @@ static int xgpu_ai_poll_msg(struct amdgpu_device *adev, enum idh_event event)
 	r = xgpu_ai_mailbox_rcv_msg(adev, event);
 	while (r) {
 		if (timeout <= 0) {
-			pr_err("Doesn't get ack from pf.\n");
+			pr_err("Doesn't get msg:%d from pf.\n", event);
 			r = -ETIME;
 			break;
 		}
-		msleep(1);
-		timeout -= 1;
+		mdelay(5);
+		timeout -= 5;
 
 		r = xgpu_ai_mailbox_rcv_msg(adev, event);
 	}
@@ -165,7 +165,7 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev,
 	/* start to poll ack */
 	r = xgpu_ai_poll_ack(adev);
 	if (r)
-		return r;
+		pr_err("Doesn't get ack from pf, continue\n");
 
 	xgpu_ai_mailbox_set_valid(adev, false);
 
@@ -174,8 +174,10 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev,
 		req == IDH_REQ_GPU_FINI_ACCESS ||
 		req == IDH_REQ_GPU_RESET_ACCESS) {
 		r = xgpu_ai_poll_msg(adev, IDH_READY_TO_ACCESS_GPU);
-		if (r)
+		if (r) {
+			pr_err("Doesn't get READY_TO_ACCESS_GPU from pf, give up\n");
 			return r;
+		}
 	}
 
 	return 0;
@@ -211,7 +213,7 @@ static int xgpu_ai_mailbox_ack_irq(struct amdgpu_device *adev,
 					struct amdgpu_irq_src *source,
 					struct amdgpu_iv_entry *entry)
 {
-	DRM_DEBUG("get ack intr and do nothing.\n");
+	printk("get ack intr and do nothing.\n");
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
index 7bdc51b..f0d64f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
@@ -398,8 +398,8 @@ static int xgpu_vi_poll_ack(struct amdgpu_device *adev)
 			r = -ETIME;
 			break;
 		}
-		msleep(1);
-		timeout -= 1;
+		mdelay(5);
+		timeout -= 5;
 
 		reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL);
 	}
@@ -418,8 +418,8 @@ static int xgpu_vi_poll_msg(struct amdgpu_device *adev, enum idh_event event)
 			r = -ETIME;
 			break;
 		}
-		msleep(1);
-		timeout -= 1;
+		mdelay(5);
+		timeout -= 5;
 
 		r = xgpu_vi_mailbox_rcv_msg(adev, event);
 	}
@@ -447,7 +447,7 @@ static int xgpu_vi_send_access_requests(struct amdgpu_device *adev,
 		request == IDH_REQ_GPU_RESET_ACCESS) {
 		r = xgpu_vi_poll_msg(adev, IDH_READY_TO_ACCESS_GPU);
 		if (r)
-			return r;
+			pr_err("Doesn't get ack from pf, continue\n");
 	}
 
 	return 0;
-- 
2.7.4