[PATCH] drm/amdgpu: Increase tlb flush timeout for sriov
Christian König
ckoenig.leichtzumerken at gmail.com
Wed May 19 10:08:28 UTC 2021
Am 19.05.21 um 11:32 schrieb Chengzhe Liu:
> When there is 12 VF, we need to increase the timeout
NAK, 6 seconds is way to long to wait polling on a fence.
Why should an invalidation take that long? The engine are per VF just to
avoid exactly that problem.
Christian.
>
> Signed-off-by: Chengzhe Liu <ChengZhe.Liu at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 6 +++++-
> drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 6 +++++-
> 2 files changed, 10 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index f02dc904e4cf..a5f005c5d0ec 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -404,6 +404,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> uint32_t seq;
> uint16_t queried_pasid;
> bool ret;
> + uint32_t sriov_usec_timeout = 6000000; /* wait for 12 * 500ms for SRIOV */
> struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
> struct amdgpu_kiq *kiq = &adev->gfx.kiq;
>
> @@ -422,7 +423,10 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>
> amdgpu_ring_commit(ring);
> spin_unlock(&adev->gfx.kiq.ring_lock);
> - r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
> + if (amdgpu_sriov_vf(adev))
> + r = amdgpu_fence_wait_polling(ring, seq, sriov_usec_timeout);
> + else
> + r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
> if (r < 1) {
> dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
> return -ETIME;
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index ceb3968d8326..e4a18d8f75c2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -857,6 +857,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> uint32_t seq;
> uint16_t queried_pasid;
> bool ret;
> + uint32_t sriov_usec_timeout = 6000000; /* wait for 12 * 500ms for SRIOV */
> struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
> struct amdgpu_kiq *kiq = &adev->gfx.kiq;
>
> @@ -896,7 +897,10 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>
> amdgpu_ring_commit(ring);
> spin_unlock(&adev->gfx.kiq.ring_lock);
> - r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
> + if (amdgpu_sriov_vf(adev))
> + r = amdgpu_fence_wait_polling(ring, seq, sriov_usec_timeout);
> + else
> + r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
> if (r < 1) {
> dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
> up_read(&adev->reset_sem);
More information about the amd-gfx
mailing list