[PATCH v3 2/2] drm/amdgpu: unref the bo after job submit
Christian König
christian.koenig at amd.com
Fri Mar 13 14:20:44 UTC 2020
Am 13.03.20 um 15:07 schrieb xinhui pan:
> Otherwise we might free BOs before job has completed.
> We add the fence to BO after commit, so free BOs after that.
I'm not sure if this is necessary, but probably better save than sorry.
Some comments below.
>
> Cc: Christian König <christian.koenig at amd.com>
> Cc: Alex Deucher <alexander.deucher at amd.com>
> Cc: Felix Kuehling <Felix.Kuehling at amd.com>
> Signed-off-by: xinhui pan <xinhui.pan at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 38 +++++++++++++++++++-------
> 1 file changed, 28 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 809ca6e8f40f..605a1bb40280 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -942,13 +942,17 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
> *
> * @entry: PDE to free
> */
> -static void amdgpu_vm_free_table(struct amdgpu_vm_pt *entry)
> +static void amdgpu_vm_free_table(struct amdgpu_vm_pt *entry,
> + struct list_head *head)
> {
> if (entry->base.bo) {
> entry->base.bo->vm_bo = NULL;
> list_del(&entry->base.vm_status);
> - amdgpu_bo_unref(&entry->base.bo->shadow);
> - amdgpu_bo_unref(&entry->base.bo);
> + if (!head) {
> + amdgpu_bo_unref(&entry->base.bo->shadow);
> + amdgpu_bo_unref(&entry->base.bo);
> + } else
> + list_add(&entry->base.vm_status, head);
Instead of adding a parameter make this a new state in the VM. Something
like vm->zombies or something similar.
> }
> kvfree(entry->entries);
> entry->entries = NULL;
> @@ -965,7 +969,8 @@ static void amdgpu_vm_free_table(struct amdgpu_vm_pt *entry)
> */
> static void amdgpu_vm_free_pts(struct amdgpu_device *adev,
> struct amdgpu_vm *vm,
> - struct amdgpu_vm_pt_cursor *start)
> + struct amdgpu_vm_pt_cursor *start,
> + struct list_head *head)
> {
> struct amdgpu_vm_pt_cursor cursor;
> struct amdgpu_vm_pt *entry;
> @@ -973,10 +978,10 @@ static void amdgpu_vm_free_pts(struct amdgpu_device *adev,
> vm->bulk_moveable = false;
>
> for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
> - amdgpu_vm_free_table(entry);
> + amdgpu_vm_free_table(entry, head);
>
> if (start)
> - amdgpu_vm_free_table(start->entry);
> + amdgpu_vm_free_table(start->entry, head);
> }
>
> /**
> @@ -1428,7 +1433,8 @@ static void amdgpu_vm_fragment(struct amdgpu_vm_update_params *params,
> */
> static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
> uint64_t start, uint64_t end,
> - uint64_t dst, uint64_t flags)
> + uint64_t dst, uint64_t flags,
> + struct list_head *head)
> {
> struct amdgpu_device *adev = params->adev;
> struct amdgpu_vm_pt_cursor cursor;
> @@ -1539,7 +1545,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
> * completely covered by the range and so potentially still in use.
> */
> while (cursor.pfn < frag_start) {
> - amdgpu_vm_free_pts(adev, params->vm, &cursor);
> + amdgpu_vm_free_pts(adev, params->vm, &cursor, head);
> amdgpu_vm_pt_next(adev, &cursor);
> }
>
> @@ -1583,6 +1589,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
> struct amdgpu_vm_update_params params;
> enum amdgpu_sync_mode sync_mode;
> int r;
> + struct list_head head;
>
> memset(¶ms, 0, sizeof(params));
> params.adev = adev;
> @@ -1590,6 +1597,8 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
> params.direct = direct;
> params.pages_addr = pages_addr;
>
> + INIT_LIST_HEAD(&head);
> +
> /* Implicitly sync to command submissions in the same VM before
> * unmapping. Sync to moving fences before mapping.
> */
> @@ -1617,13 +1626,22 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
> if (r)
> goto error_unlock;
>
> - r = amdgpu_vm_update_ptes(¶ms, start, last + 1, addr, flags);
> + r = amdgpu_vm_update_ptes(¶ms, start, last + 1, addr, flags, &head);
> if (r)
> goto error_unlock;
>
> r = vm->update_funcs->commit(¶ms, fence);
>
> error_unlock:
> + while (!list_empty(&head)) {
> + struct amdgpu_vm_pt *entry = list_first_entry(&head,
> + struct amdgpu_vm_pt, base.vm_status);
> + list_del(&entry->base.vm_status);
> +
> + amdgpu_bo_unref(&entry->base.bo->shadow);
> + amdgpu_bo_unref(&entry->base.bo);
> + }
Maybe put that into a separate function.
> +
> amdgpu_vm_eviction_unlock(vm);
> return r;
> }
> @@ -3118,7 +3136,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
> amdgpu_vm_free_mapping(adev, vm, mapping, NULL);
> }
>
> - amdgpu_vm_free_pts(adev, vm, NULL);
> + amdgpu_vm_free_pts(adev, vm, NULL, NULL);
And then also call it here.
Regards,
Christian.
> amdgpu_bo_unreserve(root);
> amdgpu_bo_unref(&root);
> WARN_ON(vm->root.base.bo);
More information about the amd-gfx
mailing list