[PATCH v4 2/2] drm/amdgpu: sync page table freeing with tlb flush
Christian König
christian.koenig at amd.com
Mon Mar 4 10:05:33 UTC 2024
Am 01.03.24 um 21:17 schrieb Sharma, Shashank:
>
> On 01/03/2024 14:29, Christian König wrote:
>>
>>
>> Am 01.03.24 um 12:07 schrieb Shashank Sharma:
>>> The idea behind this patch is to delay the freeing of PT entry objects
>>> until the TLB flush is done.
>>>
>>> This patch:
>>> - Adds a tlb_flush_waitlist which will keep the objects that need to be
>>> freed after tlb_flush
>>> - Adds PT entries in this list in amdgpu_vm_pt_free_dfs, instead of
>>> freeing
>>> them immediately.
>>> - Exports function amdgpu_vm_pt_free to be called dircetly.
>>> - Adds a 'force' input bool to amdgpu_vm_pt_free_dfs to differentiate
>>> between immediate freeing of the BOs (like from
>>> amdgpu_vm_pt_free_root) vs delayed freeing.
>>>
>>> V2: rebase
>>> V4: (Christian)
>>> - add only locked PTEs entries in TLB flush waitlist.
>>> - do not create a separate function for list flush.
>>> - do not create a new lock for TLB flush.
>>> - there is no need to wait on tlb_flush_fence exclusively.
>>>
>>> Cc: Christian König <Christian.Koenig at amd.com>
>>> Cc: Alex Deucher <alexander.deucher at amd.com>
>>> Cc: Felix Kuehling <felix.kuehling at amd.com>
>>> Cc: Rajneesh Bhardwaj <rajneesh.bhardwaj at amd.com>
>>> Signed-off-by: Shashank Sharma <shashank.sharma at amd.com>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 10 ++++++++++
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 4 ++++
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 21 ++++++++++++++-------
>>> 3 files changed, 28 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> index 310aae6fb49b..94581a1fe34f 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> @@ -990,11 +990,20 @@ int amdgpu_vm_update_range(struct
>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>> /* Prepare a TLB flush fence to be attached to PTs */
>>> if (!unlocked && params.needs_flush && vm->is_compute_context) {
>>> + struct amdgpu_vm_bo_base *entry, *next;
>>> +
>>> amdgpu_vm_tlb_fence_create(adev, vm, fence);
>>> /* Makes sure no PD/PT is freed before the flush */
>>> dma_resv_add_fence(vm->root.bo->tbo.base.resv, *fence,
>>> DMA_RESV_USAGE_BOOKKEEP);
>>> +
>>> + if (list_empty(&vm->tlb_flush_waitlist))
>>> + goto error_unlock;
>>> +
>>> + /* Now actually free the waitlist */
>>> + list_for_each_entry_safe(entry, next,
>>> &vm->tlb_flush_waitlist, vm_status)
>>> + amdgpu_vm_pt_free(entry);
>>
>> This needs to be outside of the "if". We also need to free the
>> PDs/PTs in non compute VMs.
>
>
> Noted,
>
>>
>>> }
>>> error_unlock:
>>> @@ -2214,6 +2223,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev,
>>> struct amdgpu_vm *vm,
>>> INIT_LIST_HEAD(&vm->pt_freed);
>>> INIT_WORK(&vm->pt_free_work, amdgpu_vm_pt_free_work);
>>> INIT_KFIFO(vm->faults);
>>> + INIT_LIST_HEAD(&vm->tlb_flush_waitlist);
>>> r = amdgpu_seq64_alloc(adev, &vm->tlb_seq_va,
>>> &vm->tlb_seq_cpu_addr);
>>> if (r)
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> index 298f604b8e5f..ba374c2c61bd 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> @@ -343,6 +343,9 @@ struct amdgpu_vm {
>>> uint64_t *tlb_seq_cpu_addr;
>>> uint64_t tlb_fence_context;
>>> + /* temporary storage of PT BOs until the TLB flush */
>>> + struct list_head tlb_flush_waitlist;
>>> +
>>> atomic64_t kfd_last_flushed_seq;
>>> /* How many times we had to re-generate the page tables */
>>> @@ -545,6 +548,7 @@ int amdgpu_vm_ptes_update(struct
>>> amdgpu_vm_update_params *params,
>>> uint64_t start, uint64_t end,
>>> uint64_t dst, uint64_t flags);
>>> void amdgpu_vm_pt_free_work(struct work_struct *work);
>>> +void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry);
>>> #if defined(CONFIG_DEBUG_FS)
>>> void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct
>>> seq_file *m);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
>>> index 95dc0afdaffb..cb14e5686c0f 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
>>> @@ -636,7 +636,7 @@ static int amdgpu_vm_pt_alloc(struct
>>> amdgpu_device *adev,
>>> *
>>> * @entry: PDE to free
>>> */
>>> -static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
>>> +void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
>>> {
>>> struct amdgpu_bo *shadow;
>>> @@ -685,13 +685,15 @@ void amdgpu_vm_pt_free_work(struct
>>> work_struct *work)
>>> * @vm: amdgpu vm structure
>>> * @start: optional cursor where to start freeing PDs/PTs
>>> * @unlocked: vm resv unlock status
>>> + * @force: force free all PDs/PTs without waiting for TLB flush
>>> *
>>> * Free the page directory or page table level and all sub levels.
>>> */
>>> static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev,
>>> struct amdgpu_vm *vm,
>>> struct amdgpu_vm_pt_cursor *start,
>>> - bool unlocked)
>>> + bool unlocked,
>>> + bool force)
>>> {
>>> struct amdgpu_vm_pt_cursor cursor;
>>> struct amdgpu_vm_bo_base *entry;
>>> @@ -708,11 +710,15 @@ static void amdgpu_vm_pt_free_dfs(struct
>>> amdgpu_device *adev,
>>> return;
>>> }
>>> - for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
>>> - amdgpu_vm_pt_free(entry);
>>> + for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) {
>>> + if (!force)
>>> + list_move(&entry->vm_status, &vm->tlb_flush_waitlist);
>>> + else
>>> + amdgpu_vm_pt_free(entry);
>>> + }
>>> if (start)
>>> - amdgpu_vm_pt_free(start->entry);
>>> + list_move(&start->entry->vm_status, &vm->tlb_flush_waitlist);
>>> }
>>
>> The complexity inside amdgpu_vm_pt_free_dfs() really starts to make
>> no sense any more.
>>
>> First of all ditch the usage in amdgpu_vm_pt_free_root(). Instead
>> just inline the call to for_each_amdgpu_vm_pt_dfs_safe() to free up
>> all of the page tables immediately.
>
>
> Noted,
>
>>
>> Then the other use case in amdgpu_vm_ptes_update():
>>
>> /* Make sure previous mapping is freed */
>> if (cursor.entry->bo) {
>> params->table_freed = true;
>> amdgpu_vm_pt_free_dfs(adev, params->vm, &cursor, params->unlocked);
>> }
>>
>> We should basically change params->table_freed into a list_head and
>> put all to be freed entries on there. Something like that:
>>
>> spin_lock(&vm->status_lock);
>> for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
>> list_move(&entry->vm_status, ¶ms->table_freed);
>> spin_unlock(&vm->status_lock);
>>
>> And then finally in amdgpu_vm_update_range() we should call
>> amdgpu_vm_pt_free_dfs() to really free up all PDs/PTs (probably
>> rename the function while at it).
>
> - in amdgpu_vm_pt_free_dfs, there are two different lists for locked
> and unlocked PT entries
>
> unlocked is saved in vm->pt_freed
>
> locked is freed immediately, which we now want to add into another
> list (params->table_freed in your suggestion) and free after tlb_flush
>
> which means we have to do something like this:
>
> in amdgpu_vm_ptes_update {
>
> /* Make sure previous mapping is freed */
> if (cursor.entry->bo) {
>
> for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
>
> if (unlocked) {
>
> list_move(&entry->vm_status, &vm->pt_free);
>
> else
> list_move(&entry->vm_status, ¶ms->table_freed);
No, here just move everything to params->tables_freed as necessary.
>
> }
>
> }
>
> and then we have to change the amdgpu_vm_pt_free_dfs() implementation
> to simply free BOs, not searching anything like:
>
> if (unlocked) {
And then do here a list_splice() to move everything from tables_freed to
pt_free.
>
> schedule_work(pt_free_work)
>
> } else {
>
> list_for_each(params->table_freed)
>
> amdgpu_vm_pt_free(entry)
>
> }
>
> and then we would be able to call amdgpu_vm_pt_free_dfs() from
> amdgpu_vm_update_range().
>
> Does that sound good to you ?
Very close, I just want to avoid any extra unlocked handling in the low
level PTE update function.
Essentially we only record which PDs/PTs to free in there because we
have a cursor at the right location in that moment.
The state machine itself is something I try to keep out of the PTE
update code.
Regards,
Christian.
>
> - Shashank
>
>
>> Regards,
>> Christian.
>>
>>> /**
>>> @@ -724,7 +730,7 @@ static void amdgpu_vm_pt_free_dfs(struct
>>> amdgpu_device *adev,
>>> */
>>> void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct
>>> amdgpu_vm *vm)
>>> {
>>> - amdgpu_vm_pt_free_dfs(adev, vm, NULL, false);
>>> + amdgpu_vm_pt_free_dfs(adev, vm, NULL, false, true);
>>> }
>>> /**
>>> @@ -1059,7 +1065,8 @@ int amdgpu_vm_ptes_update(struct
>>> amdgpu_vm_update_params *params,
>>> params->needs_flush = true;
>>> amdgpu_vm_pt_free_dfs(adev, params->vm,
>>> &cursor,
>>> - params->unlocked);
>>> + params->unlocked,
>>> + false);
>>> }
>>> amdgpu_vm_pt_next(adev, &cursor);
>>> }
>>
More information about the amd-gfx
mailing list