[PATCH] drm/amdgpu: fix concurrent VM flushes on Vega/Navi v2

Fri Apr 23 12:50:21 UTC 2021

Reviewed-by: James Zhu <James.Zhu at amd.com>
Tested-by: James Zhu <James.Zhu at amd.com>

James

On 2021-04-23 5:29 a.m., Christian König wrote:
> Starting with Vega the hardware supports concurrent flushes
> of VMID which can be used to implement per process VMID
> allocation.
>
> But concurrent flushes are mutual exclusive with back to
> back VMID allocations, fix this to avoid a VMID used in
> two ways at the same time.
>
> v2: don't set ring to NULL
>
> Signed-off-by: Christian König <christian.koenig at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 19 +++++++++++--------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |  6 ++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  1 +
>   3 files changed, 18 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
> index 94b069630db3..b4971e90b98c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
> @@ -215,7 +215,11 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm,
>   	/* Check if we have an idle VMID */
>   	i = 0;
>   	list_for_each_entry((*idle), &id_mgr->ids_lru, list) {
> -		fences[i] = amdgpu_sync_peek_fence(&(*idle)->active, ring);
> +		/* Don't use per engine and per process VMID at the same time */
> +		struct amdgpu_ring *r = adev->vm_manager.concurrent_flush ?
> +			NULL : ring;
> +
> +		fences[i] = amdgpu_sync_peek_fence(&(*idle)->active, r);
>   		if (!fences[i])
>   			break;
>   		++i;
> @@ -281,7 +285,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
>   	if (updates && (*id)->flushed_updates &&
>   	    updates->context == (*id)->flushed_updates->context &&
>   	    !dma_fence_is_later(updates, (*id)->flushed_updates))
> -	    updates = NULL;
> +		updates = NULL;
>   
>   	if ((*id)->owner != vm->immediate.fence_context ||
>   	    job->vm_pd_addr != (*id)->pd_gpu_addr ||
> @@ -290,6 +294,10 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
>   	     !dma_fence_is_signaled((*id)->last_flush))) {
>   		struct dma_fence *tmp;
>   
> +		/* Don't use per engine and per process VMID at the same time */
> +		if (adev->vm_manager.concurrent_flush)
> +			ring = NULL;
> +
>   		/* to prevent one context starved by another context */
>   		(*id)->pd_gpu_addr = 0;
>   		tmp = amdgpu_sync_peek_fence(&(*id)->active, ring);
> @@ -365,12 +373,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
>   		if (updates && (!flushed || dma_fence_is_later(updates, flushed)))
>   			needs_flush = true;
>   
> -		/* Concurrent flushes are only possible starting with Vega10 and
> -		 * are broken on Navi10 and Navi14.
> -		 */
> -		if (needs_flush && (adev->asic_type < CHIP_VEGA10 ||
> -				    adev->asic_type == CHIP_NAVI10 ||
> -				    adev->asic_type == CHIP_NAVI14))
> +		if (needs_flush && !adev->vm_manager.concurrent_flush)
>   			continue;
>   
>   		/* Good, we can use this VMID. Remember this submission as
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index f95bcda8463f..adc4481b05fb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -3149,6 +3149,12 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
>   {
>   	unsigned i;
>   
> +	/* Concurrent flushes are only possible starting with Vega10 and
> +	 * are broken on Navi10 and Navi14.
> +	 */
> +	adev->vm_manager.concurrent_flush = !(adev->asic_type < CHIP_VEGA10 ||
> +					      adev->asic_type == CHIP_NAVI10 ||
> +					      adev->asic_type == CHIP_NAVI14);
>   	amdgpu_vmid_mgr_init(adev);
>   
>   	adev->vm_manager.fence_context =
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 848e175e99ff..163763f8b418 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -331,6 +331,7 @@ struct amdgpu_vm_manager {
>   	/* Handling of VMIDs */
>   	struct amdgpu_vmid_mgr			id_mgr[AMDGPU_MAX_VMHUBS];
>   	unsigned int				first_kfd_vmid;
> +	bool					concurrent_flush;
>   
>   	/* Handling of VM fences */
>   	u64					fence_context;