[PATCH] drm/amdgpu/gmc9: give more chance for tlb flush if failed(v2)

Wed Apr 4 08:00:04 UTC 2018

Am 04.04.2018 um 07:01 schrieb Emily Deng:
> under SR-IOV sometimes CPU based tlb flush would timeout within
> the given 100ms period, instead let it fail and continue we can
> give it more chance to repeat the tlb flush on the failed VMHUB
>
> this could fix the massive "Timeout waiting for VM flush ACK"
> error during vk_encoder test.
>
> v2:refine the code
>
> Signed-off-by: Monk Liu <Monk.Liu at amd.com>
> Signed-off-by: Emily Deng <Emily.Deng at amd.com>

Acked-by: Christian König <christian.koenig at amd.com>

But that is still a rather ugly workaround, we should probably not 
upstream that and wait for the RLC fix instead.

Christian.

> ---
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 50 ++++++++++++++++++++---------------
>   1 file changed, 28 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 503070f..44602d4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -328,7 +328,8 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev,
>   {
>   	/* Use register 17 for GART */
>   	const unsigned eng = 17;
> -	unsigned i, j;
> +	unsigned retry = 3;
> +	unsigned i, j, k;
>   
>   	spin_lock(&adev->gmc.invalidate_lock);
>   
> @@ -336,31 +337,36 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev,
>   		struct amdgpu_vmhub *hub = &adev->vmhub[i];
>   		u32 tmp = gmc_v9_0_get_invalidate_req(vmid);
>   
> -		WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
> +		for (k = 0; k < retry; ++k) {
>   
> -		/* Busy wait for ACK.*/
> -		for (j = 0; j < 100; j++) {
> -			tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
> -			tmp &= 1 << vmid;
> -			if (tmp)
> +			WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
> +
> +			/* Busy wait for ACK.*/
> +			for (j = 0; j < 100; j++) {
> +				tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
> +				tmp &= 1 << vmid;
> +				if (tmp)
> +					break;
> +				cpu_relax();
> +			}
> +			if (j < 100)
>   				break;
> -			cpu_relax();
> -		}
> -		if (j < 100)
> -			continue;
> -
> -		/* Wait for ACK with a delay.*/
> -		for (j = 0; j < adev->usec_timeout; j++) {
> -			tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
> -			tmp &= 1 << vmid;
> -			if (tmp)
> +
> +			/* Wait for ACK with a delay.*/
> +			for (j = 0; j < adev->usec_timeout; j++) {
> +				tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
> +				tmp &= 1 << vmid;
> +				if (tmp)
> +					break;
> +				udelay(1);
> +			}
> +			if (j < adev->usec_timeout)
>   				break;
> -			udelay(1);
> +			if (k == retry)
> +				DRM_ERROR("Timeout waiting for VM flush ACK!\n");
> +			else
> +				DRM_ERROR("Need one more try to write the VMHUB flush request!");
>   		}
> -		if (j < adev->usec_timeout)
> -			continue;
> -
> -		DRM_ERROR("Timeout waiting for VM flush ACK!\n");
>   	}
>   
>   	spin_unlock(&adev->gmc.invalidate_lock);