[PATCH v9 2/2] drm/amdgpu: sync page table freeing with tlb flush
Christian König
christian.koenig at amd.com
Tue Mar 19 10:36:17 UTC 2024
Am 18.03.24 um 17:11 schrieb Shashank Sharma:
> The idea behind this patch is to delay the freeing of PT entry objects
> until the TLB flush is done.
>
> This patch:
> - Adds a tlb_flush_waitlist in amdgpu_vm_update_params which will keep the
> objects that need to be freed after tlb_flush.
> - Adds PT entries in this list in amdgpu_vm_ptes_update after finding
> the PT entry.
> - Changes functionality of amdgpu_vm_pt_free_dfs from (df_search + free)
> to simply freeing of the BOs, also renames it to
> amdgpu_vm_pt_free_list to reflect this same.
> - Exports function amdgpu_vm_pt_free_list to be called directly.
> - Calls amdgpu_vm_pt_free_list directly from amdgpu_vm_update_range.
>
> V2: rebase
> V4: Addressed review comments from Christian
> - add only locked PTEs entries in TLB flush waitlist.
> - do not create a separate function for list flush.
> - do not create a new lock for TLB flush.
> - there is no need to wait on tlb_flush_fence exclusively.
>
> V5: Addressed review comments from Christian
> - change the amdgpu_vm_pt_free_dfs's functionality to simple freeing
> of the objects and rename it.
> - add all the PTE objects in params->tlb_flush_waitlist
> - let amdgpu_vm_pt_free_root handle the freeing of BOs independently
> - call amdgpu_vm_pt_free_list directly
>
> V6: Rebase
> V7: Rebase
> V8: Added a NULL check to fix this backtrace issue:
> [ 415.351447] BUG: kernel NULL pointer dereference, address: 0000000000000008
> [ 415.359245] #PF: supervisor write access in kernel mode
> [ 415.365081] #PF: error_code(0x0002) - not-present page
> [ 415.370817] PGD 101259067 P4D 101259067 PUD 10125a067 PMD 0
> [ 415.377140] Oops: 0002 [#1] PREEMPT SMP NOPTI
> [ 415.382004] CPU: 0 PID: 25481 Comm: test_with_MPI.e Tainted: G OE 5.18.2-mi300-build-140423-ubuntu-22.04+ #24
> [ 415.394437] Hardware name: AMD Corporation Sh51p/Sh51p, BIOS RMO1001AS 02/21/2024
> [ 415.402797] RIP: 0010:amdgpu_vm_ptes_update+0x6fd/0xa10 [amdgpu]
> [ 415.409648] Code: 4c 89 ff 4d 8d 66 30 e8 f1 ed ff ff 48 85 db 74 42 48 39 5d a0 74 40 48 8b 53 20 48 8b 4b 18 48 8d 43 18 48 8d 75 b0 4c 89 ff <48
>> 89 51 08 48 89 0a 49 8b 56 30 48 89 42 08 48 89 53 18 4c 89 63
> [ 415.430621] RSP: 0018:ffffc9000401f990 EFLAGS: 00010287
> [ 415.436456] RAX: ffff888147bb82f0 RBX: ffff888147bb82d8 RCX: 0000000000000000
> [ 415.444426] RDX: 0000000000000000 RSI: ffffc9000401fa30 RDI: ffff888161f80000
> [ 415.452397] RBP: ffffc9000401fa80 R08: 0000000000000000 R09: ffffc9000401fa00
> [ 415.460368] R10: 00000007f0cc0000 R11: 00000007f0c85000 R12: ffffc9000401fb20
> [ 415.468340] R13: 00000007f0d00000 R14: ffffc9000401faf0 R15: ffff888161f80000
> [ 415.476312] FS: 00007f132ff89840(0000) GS:ffff889f87c00000(0000) knlGS:0000000000000000
> [ 415.485350] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 415.491767] CR2: 0000000000000008 CR3: 0000000161d46003 CR4: 0000000000770ef0
> [ 415.499738] PKRU: 55555554
> [ 415.502750] Call Trace:
> [ 415.505482] <TASK>
> [ 415.507825] amdgpu_vm_update_range+0x32a/0x880 [amdgpu]
> [ 415.513869] amdgpu_vm_clear_freed+0x117/0x250 [amdgpu]
> [ 415.519814] amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu+0x18c/0x250 [amdgpu]
> [ 415.527729] kfd_ioctl_unmap_memory_from_gpu+0xed/0x340 [amdgpu]
> [ 415.534551] kfd_ioctl+0x3b6/0x510 [amdgpu]
>
> V9: Addressed review comments from Christian
> - No NULL check reqd for root PT freeing
> - Free PT list regardless of needs_flush
> - Move adding BOs in list in a separate function
>
> Cc: Christian König <Christian.Koenig at amd.com>
> Cc: Alex Deucher <alexander.deucher at amd.com>
> Cc: Felix Kuehling <felix.kuehling at amd.com>
> Cc: Rajneesh Bhardwaj <rajneesh.bhardwaj at amd.com>
> Acked-by: Felix Kuehling <felix.kuehling at amd.com>
> Acked-by: Rajneesh Bhardwaj <rajneesh.bhardwaj at amd.com>
> Tested-by: Rajneesh Bhardwaj <rajneesh.bhardwaj at amd.com>
> Signed-off-by: Shashank Sharma <shashank.sharma at amd.com>
Reviewed-by: Christian König <christian.koenig at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 3 ++
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 7 +++
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 66 +++++++++++++++--------
> 3 files changed, 53 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 104bf600c85f..8fada1152664 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -986,6 +986,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> params.unlocked = unlocked;
> params.needs_flush = flush_tlb;
> params.allow_override = allow_override;
> + INIT_LIST_HEAD(¶ms.tlb_flush_waitlist);
>
> /* Implicitly sync to command submissions in the same VM before
> * unmapping. Sync to moving fences before mapping.
> @@ -1076,6 +1077,8 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> tlb_cb = NULL;
> }
>
> + amdgpu_vm_pt_free_list(adev, ¶ms);
> +
> error_free:
> kfree(tlb_cb);
> amdgpu_vm_eviction_unlock(vm);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index b0a4fe683352..54d7da396de0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -266,6 +266,11 @@ struct amdgpu_vm_update_params {
> * to be overridden for NUMA local memory.
> */
> bool allow_override;
> +
> + /**
> + * @tlb_flush_waitlist: temporary storage for BOs until tlb_flush
> + */
> + struct list_head tlb_flush_waitlist;
> };
>
> struct amdgpu_vm_update_funcs {
> @@ -547,6 +552,8 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
> uint64_t start, uint64_t end,
> uint64_t dst, uint64_t flags);
> void amdgpu_vm_pt_free_work(struct work_struct *work);
> +void amdgpu_vm_pt_free_list(struct amdgpu_device *adev,
> + struct amdgpu_vm_update_params *params);
>
> #if defined(CONFIG_DEBUG_FS)
> void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> index 601df0ce8290..d904fc96ba0f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> @@ -622,40 +622,58 @@ void amdgpu_vm_pt_free_work(struct work_struct *work)
> }
>
> /**
> - * amdgpu_vm_pt_free_dfs - free PD/PT levels
> + * amdgpu_vm_pt_free_list - free PD/PT levels
> *
> * @adev: amdgpu device structure
> - * @vm: amdgpu vm structure
> - * @start: optional cursor where to start freeing PDs/PTs
> - * @unlocked: vm resv unlock status
> + * @params: see amdgpu_vm_update_params definition
> *
> - * Free the page directory or page table level and all sub levels.
> + * Free the page directory objects saved in the flush list
> */
> -static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev,
> - struct amdgpu_vm *vm,
> - struct amdgpu_vm_pt_cursor *start,
> - bool unlocked)
> +void amdgpu_vm_pt_free_list(struct amdgpu_device *adev,
> + struct amdgpu_vm_update_params *params)
> {
> - struct amdgpu_vm_pt_cursor cursor;
> - struct amdgpu_vm_bo_base *entry;
> + struct amdgpu_vm_bo_base *entry, *next;
> + struct amdgpu_vm *vm = params->vm;
> + bool unlocked = params->unlocked;
> +
> + if (list_empty(¶ms->tlb_flush_waitlist))
> + return;
>
> if (unlocked) {
> spin_lock(&vm->status_lock);
> - for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
> - list_move(&entry->vm_status, &vm->pt_freed);
> -
> - if (start)
> - list_move(&start->entry->vm_status, &vm->pt_freed);
> + list_splice_init(&vm->pt_freed, ¶ms->tlb_flush_waitlist);
> spin_unlock(&vm->status_lock);
> schedule_work(&vm->pt_free_work);
> return;
> }
>
> - for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
> + list_for_each_entry_safe(entry, next, ¶ms->tlb_flush_waitlist, vm_status)
> amdgpu_vm_pt_free(entry);
> +}
>
> - if (start)
> - amdgpu_vm_pt_free(start->entry);
> +/**
> + * amdgpu_vm_pt_add_list - add PD/PT level to the flush list
> + *
> + * @params: parameters for the update
> + * @cursor: first PT entry to start DF search from, non NULL
> + *
> + * This list will be freed after TLB flush.
> + */
> +static void amdgpu_vm_pt_add_list(struct amdgpu_vm_update_params *params,
> + struct amdgpu_vm_pt_cursor *cursor)
> +{
> + struct amdgpu_vm_pt_cursor seek;
> + struct amdgpu_vm_bo_base *entry;
> +
> + spin_lock(¶ms->vm->status_lock);
> + for_each_amdgpu_vm_pt_dfs_safe(params->adev, params->vm, cursor, seek, entry) {
> + if (entry && entry->bo)
> + list_move(&entry->vm_status, ¶ms->tlb_flush_waitlist);
> + }
> +
> + /* enter start node now */
> + list_move(&cursor->entry->vm_status, ¶ms->tlb_flush_waitlist);
> + spin_unlock(¶ms->vm->status_lock);
> }
>
> /**
> @@ -667,7 +685,11 @@ static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev,
> */
> void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm)
> {
> - amdgpu_vm_pt_free_dfs(adev, vm, NULL, false);
> + struct amdgpu_vm_pt_cursor cursor;
> + struct amdgpu_vm_bo_base *entry;
> +
> + for_each_amdgpu_vm_pt_dfs_safe(adev, vm, NULL, cursor, entry)
> + amdgpu_vm_pt_free(entry);
> }
>
> /**
> @@ -973,9 +995,7 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
> /* Make sure previous mapping is freed */
> if (cursor.entry->bo) {
> params->needs_flush = true;
> - amdgpu_vm_pt_free_dfs(adev, params->vm,
> - &cursor,
> - params->unlocked);
> + amdgpu_vm_pt_add_list(params, &cursor);
> }
> amdgpu_vm_pt_next(adev, &cursor);
> }
More information about the amd-gfx
mailing list