[RFC PATCH 1/2] drm/amdkfd: Move TLB flushing logic into amdgpu

Mon Mar 13 07:36:53 UTC 2023

Am 10.03.23 um 23:16 schrieb Felix Kuehling:
> This will make it possible for amdgpu GEM ioctls to flush TLBs on compute
> VMs.
>
> This removes VMID-based TLB flushing and always uses PASID-based
> flushing. This still works because it scans the VMID-PASID mapping
> registers to find the right VMID. It's only slightly less efficient. This
> is not a production use case.

On the one hand it looks nice that we can now flush based on the pasid 
without having the NO_HWS dependency (I hope that this was intentional).

On the other hand I really don't like to have any variables in the 
amdgpu_vm structure which are not worked with by the VM code itself.

That already backfired with the pd_phys_addr before.

Regards,
Christian.

>
> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 30 +++++++++-------------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  6 ++---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h     |  1 +
>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h      |  9 +++++--
>   drivers/gpu/drm/amd/amdkfd/kfd_process.c   | 28 --------------------
>   5 files changed, 22 insertions(+), 52 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> index 8816853e50c0..dcbd28e99b5c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> @@ -726,31 +726,25 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
>   	return false;
>   }
>   
> -int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev,
> -				     uint16_t vmid)
> -{
> -	if (adev->family == AMDGPU_FAMILY_AI) {
> -		int i;
> -
> -		for (i = 0; i < adev->num_vmhubs; i++)
> -			amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
> -	} else {
> -		amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
> -	}
> -
> -	return 0;
> -}
> -
> -int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> -				      uint16_t pasid, enum TLB_FLUSH_TYPE flush_type)
> +int amdgpu_amdkfd_flush_tlb(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> +			    enum TLB_FLUSH_TYPE type)
>   {
> +	uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm);
>   	bool all_hub = false;
>   
> +	/*
> +	 * It can be that we race and lose here, but that is extremely unlikely
> +	 * and the worst thing which could happen is that we flush the changes
> +	 * into the TLB once more which is harmless.
> +	 */
> +	if (atomic64_xchg(&vm->kfd_last_flushed_seq, tlb_seq) == tlb_seq)
> +		return 0;
> +
>   	if (adev->family == AMDGPU_FAMILY_AI ||
>   	    adev->family == AMDGPU_FAMILY_RV)
>   		all_hub = true;
>   
> -	return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub);
> +	return amdgpu_gmc_flush_gpu_tlb_pasid(adev, vm->pasid, type, all_hub);
>   }
>   
>   bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index 01ba3589b60a..dcaf69fd833c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -157,10 +157,8 @@ int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev,
>   				uint32_t *ib_cmd, uint32_t ib_len);
>   void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle);
>   bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev);
> -int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev,
> -				uint16_t vmid);
> -int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> -				uint16_t pasid, enum TLB_FLUSH_TYPE flush_type);
> +int amdgpu_amdkfd_flush_tlb(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> +			    enum TLB_FLUSH_TYPE type);
>   
>   bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 03a3314e5b43..171de7da2959 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -290,6 +290,7 @@ struct amdgpu_vm {
>   	/* Last finished delayed update */
>   	atomic64_t		tlb_seq;
>   	struct dma_fence	*last_tlb_flush;
> +	atomic64_t		kfd_last_flushed_seq;
>   
>   	/* Last unlocked submission to the scheduler entities */
>   	struct dma_fence	*last_unlocked;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index bfa30d12406b..e029129308e7 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -715,7 +715,6 @@ struct kfd_process_device {
>   	/* VM context for GPUVM allocations */
>   	struct file *drm_file;
>   	void *drm_priv;
> -	atomic64_t tlb_seq;
>   
>   	/* GPUVM allocations storage */
>   	struct idr alloc_idr;
> @@ -1344,7 +1343,13 @@ void kfd_signal_reset_event(struct kfd_dev *dev);
>   
>   void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid);
>   
> -void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type);
> +static inline void kfd_flush_tlb(struct kfd_process_device *pdd,							 enum TLB_FLUSH_TYPE type)
> +{
> +	struct amdgpu_device *adev = pdd->dev->adev;
> +	struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
> +
> +	amdgpu_amdkfd_flush_tlb(adev, vm, type);
> +}
>   
>   static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev)
>   {
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index ebabe92f7edb..48d7d30eeb24 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -1591,7 +1591,6 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
>   		return ret;
>   	}
>   	pdd->drm_priv = drm_file->private_data;
> -	atomic64_set(&pdd->tlb_seq, 0);
>   
>   	ret = kfd_process_device_reserve_ib_mem(pdd);
>   	if (ret)
> @@ -1994,33 +1993,6 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
>   			       KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
>   }
>   
> -void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type)
> -{
> -	struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
> -	uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm);
> -	struct kfd_dev *dev = pdd->dev;
> -
> -	/*
> -	 * It can be that we race and lose here, but that is extremely unlikely
> -	 * and the worst thing which could happen is that we flush the changes
> -	 * into the TLB once more which is harmless.
> -	 */
> -	if (atomic64_xchg(&pdd->tlb_seq, tlb_seq) == tlb_seq)
> -		return;
> -
> -	if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
> -		/* Nothing to flush until a VMID is assigned, which
> -		 * only happens when the first queue is created.
> -		 */
> -		if (pdd->qpd.vmid)
> -			amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->adev,
> -							pdd->qpd.vmid);
> -	} else {
> -		amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->adev,
> -					pdd->process->pasid, type);
> -	}
> -}
> -
>   struct kfd_process_device *kfd_process_device_data_by_id(struct kfd_process *p, uint32_t gpu_id)
>   {
>   	int i;