[PATCH 1/3] drm/amdgpu: Fix TLB invalidation request when using semaphore

Tue Jan 21 22:41:55 UTC 2020

Thank for the reviews. I pushed patch 1 and 2 because they're obvious 
fixes. They have passed local testing on a Vega10. I'm still waiting for 
an opportunity to test the re-worked workaround in patch 3 on a 
problematic system with Vega20 and XGMI.

Regards,
   Felix

On 2020-01-17 8:37 p.m., Felix Kuehling wrote:
> Use a more meaningful variable name for the invalidation request
> that is distinct from the tmp variable that gets overwritten when
> acquiring the invalidation semaphore.
>
> Fixes: 00f607f38d82 ("drm/amdgpu: invalidate mmhub semaphore workaround in gmc9/gmc10")
> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 5 +++--
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 8 ++++----
>   2 files changed, 7 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 86f4ffe408e7..d914555e1212 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -262,7 +262,8 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
>   {
>   	bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, vmhub);
>   	struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
> -	u32 tmp = gmc_v10_0_get_invalidate_req(vmid, flush_type);
> +	u32 inv_req = gmc_v10_0_get_invalidate_req(vmid, flush_type);
> +	u32 tmp;
>   	/* Use register 17 for GART */
>   	const unsigned eng = 17;
>   	unsigned int i;
> @@ -289,7 +290,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
>   			DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
>   	}
>   
> -	WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
> +	WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req);
>   
>   	/*
>   	 * Issue a dummy read to wait for the ACK register to be cleared
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 54bdc1786ab1..6d95de1413c4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -476,13 +476,13 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   {
>   	bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
>   	const unsigned eng = 17;
> -	u32 j, tmp;
> +	u32 j, inv_req, tmp;
>   	struct amdgpu_vmhub *hub;
>   
>   	BUG_ON(vmhub >= adev->num_vmhubs);
>   
>   	hub = &adev->vmhub[vmhub];
> -	tmp = gmc_v9_0_get_invalidate_req(vmid, flush_type);
> +	inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
>   
>   	/* This is necessary for a HW workaround under SRIOV as well
>   	 * as GFXOFF under bare metal
> @@ -493,7 +493,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   		uint32_t req = hub->vm_inv_eng0_req + eng;
>   		uint32_t ack = hub->vm_inv_eng0_ack + eng;
>   
> -		amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, tmp,
> +		amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
>   				1 << vmid);
>   		return;
>   	}
> @@ -521,7 +521,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   			DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
>   	}
>   
> -	WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
> +	WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req);
>   
>   	/*
>   	 * Issue a dummy read to wait for the ACK register to be cleared