[PATCH v4 2/2] drm/amdgpu: sync page table freeing with tlb flush

Fri Mar 1 13:29:24 UTC 2024

Am 01.03.24 um 12:07 schrieb Shashank Sharma:
> The idea behind this patch is to delay the freeing of PT entry objects
> until the TLB flush is done.
>
> This patch:
> - Adds a tlb_flush_waitlist which will keep the objects that need to be
>    freed after tlb_flush
> - Adds PT entries in this list in amdgpu_vm_pt_free_dfs, instead of freeing
>    them immediately.
> - Exports function amdgpu_vm_pt_free to be called dircetly.
> - Adds a 'force' input bool to amdgpu_vm_pt_free_dfs to differentiate
>    between immediate freeing of the BOs (like from
>    amdgpu_vm_pt_free_root) vs delayed freeing.
>
> V2: rebase
> V4: (Christian)
>      - add only locked PTEs entries in TLB flush waitlist.
>      - do not create a separate function for list flush.
>      - do not create a new lock for TLB flush.
>      - there is no need to wait on tlb_flush_fence exclusively.
>
> Cc: Christian König <Christian.Koenig at amd.com>
> Cc: Alex Deucher <alexander.deucher at amd.com>
> Cc: Felix Kuehling <felix.kuehling at amd.com>
> Cc: Rajneesh Bhardwaj <rajneesh.bhardwaj at amd.com>
> Signed-off-by: Shashank Sharma <shashank.sharma at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    | 10 ++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h    |  4 ++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 21 ++++++++++++++-------
>   3 files changed, 28 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 310aae6fb49b..94581a1fe34f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -990,11 +990,20 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   
>   	/* Prepare a TLB flush fence to be attached to PTs */
>   	if (!unlocked && params.needs_flush && vm->is_compute_context) {
> +		struct amdgpu_vm_bo_base *entry, *next;
> +
>   		amdgpu_vm_tlb_fence_create(adev, vm, fence);
>   
>   		/* Makes sure no PD/PT is freed before the flush */
>   		dma_resv_add_fence(vm->root.bo->tbo.base.resv, *fence,
>   				   DMA_RESV_USAGE_BOOKKEEP);
> +
> +		if (list_empty(&vm->tlb_flush_waitlist))
> +			goto error_unlock;
> +
> +		/* Now actually free the waitlist */
> +		list_for_each_entry_safe(entry, next, &vm->tlb_flush_waitlist, vm_status)
> +			amdgpu_vm_pt_free(entry);

This needs to be outside of the "if". We also need to free the PDs/PTs 
in non compute VMs.

>   	}
>   
>   error_unlock:
> @@ -2214,6 +2223,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   	INIT_LIST_HEAD(&vm->pt_freed);
>   	INIT_WORK(&vm->pt_free_work, amdgpu_vm_pt_free_work);
>   	INIT_KFIFO(vm->faults);
> +	INIT_LIST_HEAD(&vm->tlb_flush_waitlist);
>   
>   	r = amdgpu_seq64_alloc(adev, &vm->tlb_seq_va, &vm->tlb_seq_cpu_addr);
>   	if (r)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 298f604b8e5f..ba374c2c61bd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -343,6 +343,9 @@ struct amdgpu_vm {
>   	uint64_t		*tlb_seq_cpu_addr;
>   	uint64_t		tlb_fence_context;
>   
> +	/* temporary storage of PT BOs until the TLB flush */
> +	struct list_head	tlb_flush_waitlist;
> +
>   	atomic64_t		kfd_last_flushed_seq;
>   
>   	/* How many times we had to re-generate the page tables */
> @@ -545,6 +548,7 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
>   			  uint64_t start, uint64_t end,
>   			  uint64_t dst, uint64_t flags);
>   void amdgpu_vm_pt_free_work(struct work_struct *work);
> +void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry);
>   
>   #if defined(CONFIG_DEBUG_FS)
>   void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> index 95dc0afdaffb..cb14e5686c0f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> @@ -636,7 +636,7 @@ static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev,
>    *
>    * @entry: PDE to free
>    */
> -static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
> +void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
>   {
>   	struct amdgpu_bo *shadow;
>   
> @@ -685,13 +685,15 @@ void amdgpu_vm_pt_free_work(struct work_struct *work)
>    * @vm: amdgpu vm structure
>    * @start: optional cursor where to start freeing PDs/PTs
>    * @unlocked: vm resv unlock status
> + * @force: force free all PDs/PTs without waiting for TLB flush
>    *
>    * Free the page directory or page table level and all sub levels.
>    */
>   static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev,
>   				  struct amdgpu_vm *vm,
>   				  struct amdgpu_vm_pt_cursor *start,
> -				  bool unlocked)
> +				  bool unlocked,
> +				  bool force)
>   {
>   	struct amdgpu_vm_pt_cursor cursor;
>   	struct amdgpu_vm_bo_base *entry;
> @@ -708,11 +710,15 @@ static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev,
>   		return;
>   	}
>   
> -	for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
> -		amdgpu_vm_pt_free(entry);
> +	for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) {
> +		if (!force)
> +			list_move(&entry->vm_status, &vm->tlb_flush_waitlist);
> +		else
> +			amdgpu_vm_pt_free(entry);
> +	}
>   
>   	if (start)
> -		amdgpu_vm_pt_free(start->entry);
> +		list_move(&start->entry->vm_status, &vm->tlb_flush_waitlist);
>   }

The complexity inside amdgpu_vm_pt_free_dfs() really starts to make no 
sense any more.

First of all ditch the usage in amdgpu_vm_pt_free_root(). Instead just 
inline the call to for_each_amdgpu_vm_pt_dfs_safe() to free up all of 
the page tables immediately.

Then the other use case in amdgpu_vm_ptes_update():

/* Make sure previous mapping is freed */
if (cursor.entry->bo) {
     params->table_freed = true;
     amdgpu_vm_pt_free_dfs(adev, params->vm, &cursor, params->unlocked);
}

We should basically change params->table_freed into a list_head and put 
all to be freed entries on there. Something like that:

spin_lock(&vm->status_lock);
for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
     list_move(&entry->vm_status, &params->table_freed);
spin_unlock(&vm->status_lock);

And then finally in amdgpu_vm_update_range() we should call 
amdgpu_vm_pt_free_dfs() to really free up all PDs/PTs (probably rename 
the function while at it).

Regards,
Christian.

>   
>   /**
> @@ -724,7 +730,7 @@ static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev,
>    */
>   void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>   {
> -	amdgpu_vm_pt_free_dfs(adev, vm, NULL, false);
> +	amdgpu_vm_pt_free_dfs(adev, vm, NULL, false, true);
>   }
>   
>   /**
> @@ -1059,7 +1065,8 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
>   					params->needs_flush = true;
>   					amdgpu_vm_pt_free_dfs(adev, params->vm,
>   							      &cursor,
> -							      params->unlocked);
> +							      params->unlocked,
> +							      false);
>   				}
>   				amdgpu_vm_pt_next(adev, &cursor);
>   			}