[PATCH] drm/amdgpu: fix TLB flushing during eviction

Thu Mar 31 06:27:17 UTC 2022

Am 30.03.22 um 22:51 schrieb philip yang:
>
>
> On 2022-03-30 05:00, Christian König wrote:
>> Testing the valid bit is not enough to figure out if we
>> need to invalidate the TLB or not.
>>
>> During eviction it is quite likely that we move a BO from VRAM to GTT and
>> update the page tables immediately to the new GTT address.
>>
>> Rework the whole function to get all the necessary parameters directly as
>> value.
>>
>> Signed-off-by: Christian König<christian.koenig at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 63 ++++++++++++++------------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 15 +++---
>>   drivers/gpu/drm/amd/amdkfd/kfd_svm.c   | 18 ++++----
>>   3 files changed, 48 insertions(+), 48 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 9992a7311387..1cac90ee3318 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -793,18 +793,19 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
>>   }
>>   
>>   /**
>> - * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
>> + * amdgpu_vm_update_range - update a range in the vm page table
>>    *
>> - * @adev: amdgpu_device pointer of the VM
>> - * @bo_adev: amdgpu_device pointer of the mapped BO
>> - * @vm: requested vm
>> + * @adev: amdgpu_device pointer to use for commands
>> + * @vm: the VM to update the range
>>    * @immediate: immediate submission in a page fault
>>    * @unlocked: unlocked invalidation during MM callback
>> + * @flush_tlb: trigger tlb invalidation after update completed
>>    * @resv: fences we need to sync to
>>    * @start: start of mapped range
>>    * @last: last mapped entry
>>    * @flags: flags for the entries
>>    * @offset: offset into nodes and pages_addr
>> + * @vram_base: base for vram mappings
>>    * @res: ttm_resource to map
>>    * @pages_addr: DMA addresses to use for mapping
>>    * @fence: optional resulting fence
>> @@ -812,17 +813,14 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
>>    * Fill in the page table entries between @start and @last.
>>    *
>>    * Returns:
>> - * 0 for success, -EINVAL for failure.
>> + * 0 for success, negative erro code for failure.
>>    */
>> -int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>> -				struct amdgpu_device *bo_adev,
>> -				struct amdgpu_vm *vm, bool immediate,
>> -				bool unlocked, struct dma_resv *resv,
>> -				uint64_t start, uint64_t last,
>> -				uint64_t flags, uint64_t offset,
>> -				struct ttm_resource *res,
>> -				dma_addr_t *pages_addr,
>> -				struct dma_fence **fence)
>> +int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>> +			   bool immediate, bool unlocked, bool flush_tlb,
>> +			   struct dma_resv *resv, uint64_t start, uint64_t last,
>> +			   uint64_t flags, uint64_t offset, uint64_t vram_base,
>> +			   struct ttm_resource *res, dma_addr_t *pages_addr,
>> +			   struct dma_fence **fence)
>>   {
>>   	struct amdgpu_vm_update_params params;
>>   	struct amdgpu_vm_tlb_seq_cb *tlb_cb;
>> @@ -910,8 +908,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>>   			}
>>   
>>   		} else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
>> -			addr = bo_adev->vm_manager.vram_base_offset +
>> -				cursor.start;
>> +			addr = vram_base + cursor.start;
>>   		} else {
>>   			addr = 0;
>>   		}
>> @@ -927,7 +924,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>>   
>>   	r = vm->update_funcs->commit(&params, fence);
>>   
>> -	if (!(flags & AMDGPU_PTE_VALID) || params.table_freed) {
>> +	if (flush_tlb || params.table_freed) {
>
> All change look good to me, but when I look at previous changes again, 
> kfd_ioctl_map_memory_to_gpu is not correct now, as this should flush 
> TLB if (!kfd_flush_tlb_after_unmap(dev)).
>

That was intentionally dropped as Felix said it wouldn't be necessary 
any more with the TLB flush rework.

Is that really causing any trouble?

Christian.

> To fix it, seems we need beow change, then pass flush_tlb flag via 
> kfd_ioctl_map_memory_to_gpu -> map_bo_to_gpuvm -> update_gpuvm_pte -> 
> amdgpu_vm_bo_update
>
> -int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct 
> amdgpu_bo_va *bo_va,
>             bool clear)
>
> -    bool flush_tlb = clear;
>
> +int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct 
> amdgpu_bo_va *bo_va,
>             bool clear, bool flush_tlb)
>
> + flush_tlb |= clear;
>
> Regards,
>
> Philip
>
>>   		tlb_cb->vm = vm;
>>   		if (!fence || !*fence ||
>>   		    dma_fence_add_callback(*fence, &tlb_cb->cb,
>> @@ -1010,9 +1007,10 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>>   	dma_addr_t *pages_addr = NULL;
>>   	struct ttm_resource *mem;
>>   	struct dma_fence **last_update;
>> +	bool flush_tlb = clear;
>>   	struct dma_resv *resv;
>> +	uint64_t vram_base;
>>   	uint64_t flags;
>> -	struct amdgpu_device *bo_adev = adev;
>>   	int r;
>>   
>>   	if (clear || !bo) {
>> @@ -1037,14 +1035,18 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>>   	}
>>   
>>   	if (bo) {
>> +		struct amdgpu_device *bo_adev = adev;
>> +
>>   		flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
>>   
>>   		if (amdgpu_bo_encrypted(bo))
>>   			flags |= AMDGPU_PTE_TMZ;
>>   
>>   		bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
>> +		vram_base = bo_adev->vm_manager.vram_base_offset;
>>   	} else {
>>   		flags = 0x0;
>> +		vram_base = 0;
>>   	}
>>   
>>   	if (clear || (bo && bo->tbo.base.resv ==
>> @@ -1054,7 +1056,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>>   		last_update = &bo_va->last_pt_update;
>>   
>>   	if (!clear && bo_va->base.moved) {
>> -		bo_va->base.moved = false;
>> +		flush_tlb = true;
>>   		list_splice_init(&bo_va->valids, &bo_va->invalids);
>>   
>>   	} else if (bo_va->cleared != clear) {
>> @@ -1077,11 +1079,11 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>>   
>>   		trace_amdgpu_vm_bo_update(mapping);
>>   
>> -		r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false,
>> -						resv, mapping->start,
>> -						mapping->last, update_flags,
>> -						mapping->offset, mem,
>> -						pages_addr, last_update);
>> +		r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb,
>> +					   resv, mapping->start, mapping->last,
>> +					   update_flags, mapping->offset,
>> +					   vram_base, mem, pages_addr,
>> +					   last_update);
>>   		if (r)
>>   			return r;
>>   	}
>> @@ -1104,6 +1106,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>>   
>>   	list_splice_init(&bo_va->invalids, &bo_va->valids);
>>   	bo_va->cleared = clear;
>> +	bo_va->base.moved = false;
>>   
>>   	if (trace_amdgpu_vm_bo_mapping_enabled()) {
>>   		list_for_each_entry(mapping, &bo_va->valids, list)
>> @@ -1272,10 +1275,10 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
>>   		    mapping->start < AMDGPU_GMC_HOLE_START)
>>   			init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
>>   
>> -		r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false, false,
>> -						resv, mapping->start,
>> -						mapping->last, init_pte_value,
>> -						0, NULL, NULL, &f);
>> +		r = amdgpu_vm_update_range(adev, vm, false, false, true, resv,
>> +					   mapping->start, mapping->last,
>> +					   init_pte_value, 0, 0, NULL, NULL,
>> +					   &f);
>>   		amdgpu_vm_free_mapping(adev, vm, mapping, f);
>>   		if (r) {
>>   			dma_fence_put(f);
>> @@ -2519,8 +2522,8 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
>>   		goto error_unlock;
>>   	}
>>   
>> -	r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true, false, NULL, addr,
>> -					addr, flags, value, NULL, NULL, NULL);
>> +	r = amdgpu_vm_update_range(adev, vm, true, false, false, NULL, addr,
>> +				   addr, flags, value, 0, NULL, NULL, NULL);
>>   	if (r)
>>   		goto error_unlock;
>>   
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> index 6b7682fe84f8..1a814fbffff8 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> @@ -402,15 +402,12 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
>>   			   struct amdgpu_vm *vm);
>>   void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
>>   			    struct amdgpu_vm *vm, struct amdgpu_bo *bo);
>> -int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>> -				struct amdgpu_device *bo_adev,
>> -				struct amdgpu_vm *vm, bool immediate,
>> -				bool unlocked, struct dma_resv *resv,
>> -				uint64_t start, uint64_t last,
>> -				uint64_t flags, uint64_t offset,
>> -				struct ttm_resource *res,
>> -				dma_addr_t *pages_addr,
>> -				struct dma_fence **fence);
>> +int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>> +			   bool immediate, bool unlocked, bool flush_tlb,
>> +			   struct dma_resv *resv, uint64_t start, uint64_t last,
>> +			   uint64_t flags, uint64_t offset, uint64_t vram_base,
>> +			   struct ttm_resource *res, dma_addr_t *pages_addr,
>> +			   struct dma_fence **fence);
>>   int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>>   			struct amdgpu_bo_va *bo_va,
>>   			bool clear);
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> index 27533f6057e0..907b02045824 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> @@ -1188,9 +1188,9 @@ svm_range_unmap_from_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>   
>>   	pr_debug("[0x%llx 0x%llx]\n", start, last);
>>   
>> -	return amdgpu_vm_bo_update_mapping(adev, adev, vm, false, true, NULL,
>> -					   start, last, init_pte_value, 0,
>> -					   NULL, NULL, fence);
>> +	return amdgpu_vm_update_range(adev, vm, false, true, true, NULL, start,
>> +				      last, init_pte_value, 0, 0, NULL, NULL,
>> +				      fence);
>>   }
>>   
>>   static int
>> @@ -1277,12 +1277,12 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
>>   			 (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0,
>>   			 pte_flags);
>>   
>> -		r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false,
>> -						NULL, last_start,
>> -						prange->start + i, pte_flags,
>> -						last_start - prange->start,
>> -						NULL, dma_addr,
>> -						&vm->last_update);
>> +		r = amdgpu_vm_update_range(adev, vm, false, false, false, NULL,
>> +					   last_start, prange->start + i,
>> +					   pte_flags,
>> +					   last_start - prange->start,
>> +					   bo_adev->vm_manager.vram_base_offset,
>> +					   NULL, dma_addr, &vm->last_update);
>>   
>>   		for (j = last_start - prange->start; j <= i; j++)
>>   			dma_addr[j] |= last_domain;