[PATCH 1/2] drm/amdgpu: Unmap only clear the page table leaves

Wed Feb 7 14:47:03 UTC 2024

Am 01.02.24 um 17:50 schrieb Philip Yang:
> SVM migration unmap pages from GPU and then update mapping to GPU to
> recover page fault. Currently unmap clears the PDE entry for range
> length >= huge page and free PTB bo, update mapping to alloc new PT bo.
> There is race bug that the freed entry bo maybe still on the pt_free
> list, reused when updating mapping and then freed, leave invalid PDE
> entry and cause GPU page fault.

Well that race here is not clear to me. Can you explain more what's 
going on and why we should change the VM code to avoid this situation?

>
> By setting the update to clear only one PDE entry or clear PTB, to
> avoid unmap to free PTE bo. This fixes the race bug and improve the
> unmap and map to GPU performance. Update mapping to huge page will
> still free the PTB bo.

I would only do this if we don't have any other way to clean this up 
since that is a really ugly workaround for this issue.

Regards,
Christian.

>
> With this change, the vm->pt_freed list and work is not needed. Add
> WARN_ON(unlocked) in amdgpu_vm_pt_free_dfs to catch if unmap to free the
> PTB.
>
> Signed-off-by: Philip Yang <Philip.Yang at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    |  4 ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h    |  4 ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 43 ++++++-----------------
>   3 files changed, 10 insertions(+), 41 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 82e5fd66a10d..3bde77dfc63f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -2256,8 +2256,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   	spin_lock_init(&vm->status_lock);
>   	INIT_LIST_HEAD(&vm->freed);
>   	INIT_LIST_HEAD(&vm->done);
> -	INIT_LIST_HEAD(&vm->pt_freed);
> -	INIT_WORK(&vm->pt_free_work, amdgpu_vm_pt_free_work);
>   	INIT_KFIFO(vm->faults);
>   
>   	r = amdgpu_vm_init_entities(adev, vm);
> @@ -2446,8 +2444,6 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>   
>   	amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm);
>   
> -	flush_work(&vm->pt_free_work);
> -
>   	root = amdgpu_bo_ref(vm->root.bo);
>   	amdgpu_bo_reserve(root, true);
>   	amdgpu_vm_set_pasid(adev, vm, 0);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index cdb61f1e7c35..74fe211b9ecd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -316,10 +316,6 @@ struct amdgpu_vm {
>   	/* BOs which are invalidated, has been updated in the PTs */
>   	struct list_head        done;
>   
> -	/* PT BOs scheduled to free and fill with zero if vm_resv is not hold */
> -	struct list_head	pt_freed;
> -	struct work_struct	pt_free_work;
> -
>   	/* contains the page directory */
>   	struct amdgpu_vm_bo_base     root;
>   	struct dma_fence	*last_update;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> index a160265ddc07..a3d609655ce3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> @@ -657,27 +657,6 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
>   	amdgpu_bo_unref(&entry->bo);
>   }
>   
> -void amdgpu_vm_pt_free_work(struct work_struct *work)
> -{
> -	struct amdgpu_vm_bo_base *entry, *next;
> -	struct amdgpu_vm *vm;
> -	LIST_HEAD(pt_freed);
> -
> -	vm = container_of(work, struct amdgpu_vm, pt_free_work);
> -
> -	spin_lock(&vm->status_lock);
> -	list_splice_init(&vm->pt_freed, &pt_freed);
> -	spin_unlock(&vm->status_lock);
> -
> -	/* flush_work in amdgpu_vm_fini ensure vm->root.bo is valid. */
> -	amdgpu_bo_reserve(vm->root.bo, true);
> -
> -	list_for_each_entry_safe(entry, next, &pt_freed, vm_status)
> -		amdgpu_vm_pt_free(entry);
> -
> -	amdgpu_bo_unreserve(vm->root.bo);
> -}
> -
>   /**
>    * amdgpu_vm_pt_free_dfs - free PD/PT levels
>    *
> @@ -696,17 +675,7 @@ static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev,
>   	struct amdgpu_vm_pt_cursor cursor;
>   	struct amdgpu_vm_bo_base *entry;
>   
> -	if (unlocked) {
> -		spin_lock(&vm->status_lock);
> -		for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
> -			list_move(&entry->vm_status, &vm->pt_freed);
> -
> -		if (start)
> -			list_move(&start->entry->vm_status, &vm->pt_freed);
> -		spin_unlock(&vm->status_lock);
> -		schedule_work(&vm->pt_free_work);
> -		return;
> -	}
> +	WARN_ON(unlocked);
>   
>   	for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
>   		amdgpu_vm_pt_free(entry);
> @@ -1009,7 +978,15 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
>   		incr = (uint64_t)AMDGPU_GPU_PAGE_SIZE << shift;
>   		mask = amdgpu_vm_pt_entries_mask(adev, cursor.level);
>   		pe_start = ((cursor.pfn >> shift) & mask) * 8;
> -		entry_end = ((uint64_t)mask + 1) << shift;
> +
> +		if (cursor.level < AMDGPU_VM_PTB && params->unlocked)
> +			/*
> +			 * Unmap to clear one PDE entry, to avoid unmap to free
> +			 * PTB using pt_free work which has race condition.
> +			 */
> +			entry_end = 1ULL << shift;
> +		else
> +			entry_end = ((uint64_t)mask + 1) << shift;
>   		entry_end += cursor.pfn & ~(entry_end - 1);
>   		entry_end = min(entry_end, end);
>