[PATCH v4 2/2] drm/amdgpu: sync page table freeing with tlb flush

Fri Mar 1 20:17:26 UTC 2024

On 01/03/2024 14:29, Christian König wrote:
>
>
> Am 01.03.24 um 12:07 schrieb Shashank Sharma:
>> The idea behind this patch is to delay the freeing of PT entry objects
>> until the TLB flush is done.
>>
>> This patch:
>> - Adds a tlb_flush_waitlist which will keep the objects that need to be
>>    freed after tlb_flush
>> - Adds PT entries in this list in amdgpu_vm_pt_free_dfs, instead of 
>> freeing
>>    them immediately.
>> - Exports function amdgpu_vm_pt_free to be called dircetly.
>> - Adds a 'force' input bool to amdgpu_vm_pt_free_dfs to differentiate
>>    between immediate freeing of the BOs (like from
>>    amdgpu_vm_pt_free_root) vs delayed freeing.
>>
>> V2: rebase
>> V4: (Christian)
>>      - add only locked PTEs entries in TLB flush waitlist.
>>      - do not create a separate function for list flush.
>>      - do not create a new lock for TLB flush.
>>      - there is no need to wait on tlb_flush_fence exclusively.
>>
>> Cc: Christian König <Christian.Koenig at amd.com>
>> Cc: Alex Deucher <alexander.deucher at amd.com>
>> Cc: Felix Kuehling <felix.kuehling at amd.com>
>> Cc: Rajneesh Bhardwaj <rajneesh.bhardwaj at amd.com>
>> Signed-off-by: Shashank Sharma <shashank.sharma at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    | 10 ++++++++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h    |  4 ++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 21 ++++++++++++++-------
>>   3 files changed, 28 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 310aae6fb49b..94581a1fe34f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -990,11 +990,20 @@ int amdgpu_vm_update_range(struct amdgpu_device 
>> *adev, struct amdgpu_vm *vm,
>>         /* Prepare a TLB flush fence to be attached to PTs */
>>       if (!unlocked && params.needs_flush && vm->is_compute_context) {
>> +        struct amdgpu_vm_bo_base *entry, *next;
>> +
>>           amdgpu_vm_tlb_fence_create(adev, vm, fence);
>>             /* Makes sure no PD/PT is freed before the flush */
>>           dma_resv_add_fence(vm->root.bo->tbo.base.resv, *fence,
>>                      DMA_RESV_USAGE_BOOKKEEP);
>> +
>> +        if (list_empty(&vm->tlb_flush_waitlist))
>> +            goto error_unlock;
>> +
>> +        /* Now actually free the waitlist */
>> +        list_for_each_entry_safe(entry, next, 
>> &vm->tlb_flush_waitlist, vm_status)
>> +            amdgpu_vm_pt_free(entry);
>
> This needs to be outside of the "if". We also need to free the PDs/PTs 
> in non compute VMs.

Noted,

>
>>       }
>>     error_unlock:
>> @@ -2214,6 +2223,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, 
>> struct amdgpu_vm *vm,
>>       INIT_LIST_HEAD(&vm->pt_freed);
>>       INIT_WORK(&vm->pt_free_work, amdgpu_vm_pt_free_work);
>>       INIT_KFIFO(vm->faults);
>> +    INIT_LIST_HEAD(&vm->tlb_flush_waitlist);
>>         r = amdgpu_seq64_alloc(adev, &vm->tlb_seq_va, 
>> &vm->tlb_seq_cpu_addr);
>>       if (r)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> index 298f604b8e5f..ba374c2c61bd 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> @@ -343,6 +343,9 @@ struct amdgpu_vm {
>>       uint64_t        *tlb_seq_cpu_addr;
>>       uint64_t        tlb_fence_context;
>>   +    /* temporary storage of PT BOs until the TLB flush */
>> +    struct list_head    tlb_flush_waitlist;
>> +
>>       atomic64_t        kfd_last_flushed_seq;
>>         /* How many times we had to re-generate the page tables */
>> @@ -545,6 +548,7 @@ int amdgpu_vm_ptes_update(struct 
>> amdgpu_vm_update_params *params,
>>                 uint64_t start, uint64_t end,
>>                 uint64_t dst, uint64_t flags);
>>   void amdgpu_vm_pt_free_work(struct work_struct *work);
>> +void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry);
>>     #if defined(CONFIG_DEBUG_FS)
>>   void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct 
>> seq_file *m);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
>> index 95dc0afdaffb..cb14e5686c0f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
>> @@ -636,7 +636,7 @@ static int amdgpu_vm_pt_alloc(struct 
>> amdgpu_device *adev,
>>    *
>>    * @entry: PDE to free
>>    */
>> -static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
>> +void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
>>   {
>>       struct amdgpu_bo *shadow;
>>   @@ -685,13 +685,15 @@ void amdgpu_vm_pt_free_work(struct 
>> work_struct *work)
>>    * @vm: amdgpu vm structure
>>    * @start: optional cursor where to start freeing PDs/PTs
>>    * @unlocked: vm resv unlock status
>> + * @force: force free all PDs/PTs without waiting for TLB flush
>>    *
>>    * Free the page directory or page table level and all sub levels.
>>    */
>>   static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev,
>>                     struct amdgpu_vm *vm,
>>                     struct amdgpu_vm_pt_cursor *start,
>> -                  bool unlocked)
>> +                  bool unlocked,
>> +                  bool force)
>>   {
>>       struct amdgpu_vm_pt_cursor cursor;
>>       struct amdgpu_vm_bo_base *entry;
>> @@ -708,11 +710,15 @@ static void amdgpu_vm_pt_free_dfs(struct 
>> amdgpu_device *adev,
>>           return;
>>       }
>>   -    for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
>> -        amdgpu_vm_pt_free(entry);
>> +    for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) {
>> +        if (!force)
>> +            list_move(&entry->vm_status, &vm->tlb_flush_waitlist);
>> +        else
>> +            amdgpu_vm_pt_free(entry);
>> +    }
>>         if (start)
>> -        amdgpu_vm_pt_free(start->entry);
>> +        list_move(&start->entry->vm_status, &vm->tlb_flush_waitlist);
>>   }
>
> The complexity inside amdgpu_vm_pt_free_dfs() really starts to make no 
> sense any more.
>
> First of all ditch the usage in amdgpu_vm_pt_free_root(). Instead just 
> inline the call to for_each_amdgpu_vm_pt_dfs_safe() to free up all of 
> the page tables immediately.

Noted,

>
> Then the other use case in amdgpu_vm_ptes_update():
>
> /* Make sure previous mapping is freed */
> if (cursor.entry->bo) {
>     params->table_freed = true;
>     amdgpu_vm_pt_free_dfs(adev, params->vm, &cursor, params->unlocked);
> }
>
> We should basically change params->table_freed into a list_head and 
> put all to be freed entries on there. Something like that:
>
> spin_lock(&vm->status_lock);
> for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
>     list_move(&entry->vm_status, &params->table_freed);
> spin_unlock(&vm->status_lock);
>
> And then finally in amdgpu_vm_update_range() we should call 
> amdgpu_vm_pt_free_dfs() to really free up all PDs/PTs (probably rename 
> the function while at it).

- in amdgpu_vm_pt_free_dfs, there are two different lists for locked and 
unlocked PT entries

   unlocked is saved in vm->pt_freed

   locked is freed immediately, which we now want to add into another 
list  (params->table_freed in your suggestion) and free after tlb_flush

  which means we have to do something like this:

in amdgpu_vm_ptes_update {

  /* Make sure previous mapping is freed */
if (cursor.entry->bo) {

     for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)

     if (unlocked) {

         list_move(&entry->vm_status, &vm->pt_free);

     else
         list_move(&entry->vm_status, &params->table_freed);

}

}

and then we have to change the amdgpu_vm_pt_free_dfs() implementation to 
simply free BOs, not searching anything like:

if (unlocked) {

     schedule_work(pt_free_work)

} else {

    list_for_each(params->table_freed)

     amdgpu_vm_pt_free(entry)

}

and then we would be able to call amdgpu_vm_pt_free_dfs() from 
amdgpu_vm_update_range().

Does that sound good to you ?

- Shashank

> Regards,
> Christian.
>
>>     /**
>> @@ -724,7 +730,7 @@ static void amdgpu_vm_pt_free_dfs(struct 
>> amdgpu_device *adev,
>>    */
>>   void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct 
>> amdgpu_vm *vm)
>>   {
>> -    amdgpu_vm_pt_free_dfs(adev, vm, NULL, false);
>> +    amdgpu_vm_pt_free_dfs(adev, vm, NULL, false, true);
>>   }
>>     /**
>> @@ -1059,7 +1065,8 @@ int amdgpu_vm_ptes_update(struct 
>> amdgpu_vm_update_params *params,
>>                       params->needs_flush = true;
>>                       amdgpu_vm_pt_free_dfs(adev, params->vm,
>>                                     &cursor,
>> -                                  params->unlocked);
>> +                                  params->unlocked,
>> +                                  false);
>>                   }
>>                   amdgpu_vm_pt_next(adev, &cursor);
>>               }
>