[PATCH 2/6] drm/amdgpu:need som change on vega10 mailbox

Wed May 3 09:05:17 UTC 2017

Am 03.05.2017 um 05:48 schrieb Monk Liu:
> if sriov gpu reset is invoked by job timeout, it is run
> in a global work-queue which is very slow and better not call
> msleep ortherwise it takes long time to get back CPU.
>
> so make below changes:
>
> 1: Change msleep 1 to mdelay 5
> 2: Ignore the ack fail from pf after time out,
>     because VF FLR will clear ack, sometime VF FLR is done
>     prior to the beginning of poll_ack so we can ignore this ack
>
> TODO:
> Put job_timedout (and the following gpu reset) in a driver thread,
> instead of the global work_struct.
>
> Change-Id: I4608c67b55c67c88597e03eee35a126d7e850839
> Signed-off-by: Monk Liu <Monk.Liu at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 18 ++++++++++--------
>   drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c | 10 +++++-----
>   2 files changed, 15 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
> index 712f36e..e967a7b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
> @@ -124,8 +124,8 @@ static int xgpu_ai_poll_ack(struct amdgpu_device *adev)
>   			r = -ETIME;
>   			break;
>   		}
> -		msleep(1);
> -		timeout -= 1;
> +		mdelay(5);
> +		timeout -= 5;
>   
>   		reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
>   						     mmBIF_BX_PF0_MAILBOX_CONTROL));
> @@ -141,12 +141,12 @@ static int xgpu_ai_poll_msg(struct amdgpu_device *adev, enum idh_event event)
>   	r = xgpu_ai_mailbox_rcv_msg(adev, event);
>   	while (r) {
>   		if (timeout <= 0) {
> -			pr_err("Doesn't get ack from pf.\n");
> +			pr_err("Doesn't get msg:%d from pf.\n", event);
>   			r = -ETIME;
>   			break;
>   		}
> -		msleep(1);
> -		timeout -= 1;
> +		mdelay(5);
> +		timeout -= 5;
>   
>   		r = xgpu_ai_mailbox_rcv_msg(adev, event);
>   	}
> @@ -165,7 +165,7 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev,
>   	/* start to poll ack */
>   	r = xgpu_ai_poll_ack(adev);
>   	if (r)
> -		return r;
> +		pr_err("Doesn't get ack from pf, continue\n");
>   
>   	xgpu_ai_mailbox_set_valid(adev, false);
>   
> @@ -174,8 +174,10 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev,
>   		req == IDH_REQ_GPU_FINI_ACCESS ||
>   		req == IDH_REQ_GPU_RESET_ACCESS) {
>   		r = xgpu_ai_poll_msg(adev, IDH_READY_TO_ACCESS_GPU);
> -		if (r)
> +		if (r) {
> +			pr_err("Doesn't get READY_TO_ACCESS_GPU from pf, give up\n");
>   			return r;
> +		}
>   	}
>   
>   	return 0;
> @@ -211,7 +213,7 @@ static int xgpu_ai_mailbox_ack_irq(struct amdgpu_device *adev,
>   					struct amdgpu_irq_src *source,
>   					struct amdgpu_iv_entry *entry)
>   {
> -	DRM_DEBUG("get ack intr and do nothing.\n");
> +	printk("get ack intr and do nothing.\n");

Changing a DRM_DEBUG to a printk looks odd. How about using pr_warn or 
pr_info instead?

Apart from that patch looks good to me, but I don't deeply into that stuff.

So with the printk fixed feel free to add an Acked-by: Christian König 
<christian.koenig at amd.com> to it.

Regards,
Christian.

>   	return 0;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
> index 7bdc51b..f0d64f1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
> @@ -398,8 +398,8 @@ static int xgpu_vi_poll_ack(struct amdgpu_device *adev)
>   			r = -ETIME;
>   			break;
>   		}
> -		msleep(1);
> -		timeout -= 1;
> +		mdelay(5);
> +		timeout -= 5;
>   
>   		reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL);
>   	}
> @@ -418,8 +418,8 @@ static int xgpu_vi_poll_msg(struct amdgpu_device *adev, enum idh_event event)
>   			r = -ETIME;
>   			break;
>   		}
> -		msleep(1);
> -		timeout -= 1;
> +		mdelay(5);
> +		timeout -= 5;
>   
>   		r = xgpu_vi_mailbox_rcv_msg(adev, event);
>   	}
> @@ -447,7 +447,7 @@ static int xgpu_vi_send_access_requests(struct amdgpu_device *adev,
>   		request == IDH_REQ_GPU_RESET_ACCESS) {
>   		r = xgpu_vi_poll_msg(adev, IDH_READY_TO_ACCESS_GPU);
>   		if (r)
> -			return r;
> +			pr_err("Doesn't get ack from pf, continue\n");
>   	}
>   
>   	return 0;