[PATCH v4 2/2] drm/amdgpu: unref pt bo after job submit
Felix Kuehling
felix.kuehling at amd.com
Fri Mar 13 17:36:23 UTC 2020
This seems weird. This means that we update a page table, and then free
it in the same amdgpu_vm_update_ptes call? That means the update is
redundant. Can we eliminate the redundant PTE update if the page table
is about to be freed anyway?
Regards,
Felix
On 2020-03-13 12:09, xinhui pan wrote:
> Free page table bo before job submit is insane.
> We might touch invalid memory while job is runnig.
>
> we now have individualized bo resv during bo releasing.
> So any fences added to root PT bo is actually untested when
> a normal PT bo is releasing.
>
> We might hit gmc page fault or memory just got overwrited.
>
> Cc: Christian König <christian.koenig at amd.com>
> Cc: Alex Deucher <alexander.deucher at amd.com>
> Cc: Felix Kuehling <Felix.Kuehling at amd.com>
> Signed-off-by: xinhui pan <xinhui.pan at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 24 +++++++++++++++++++++---
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 +++
> 2 files changed, 24 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 73398831196f..346e2f753474 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -937,6 +937,21 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
> return r;
> }
>
> +static void amdgpu_vm_free_zombie_bo(struct amdgpu_device *adev,
> + struct amdgpu_vm *vm)
> +{
> + struct amdgpu_vm_pt *entry;
> +
> + while (!list_empty(&vm->zombies)) {
> + entry = list_first_entry(&vm->zombies, struct amdgpu_vm_pt,
> + base.vm_status);
> + list_del(&entry->base.vm_status);
> +
> + amdgpu_bo_unref(&entry->base.bo->shadow);
> + amdgpu_bo_unref(&entry->base.bo);
> + }
> +}
> +
> /**
> * amdgpu_vm_free_table - fre one PD/PT
> *
> @@ -945,10 +960,9 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
> static void amdgpu_vm_free_table(struct amdgpu_vm_pt *entry)
> {
> if (entry->base.bo) {
> + list_move(&entry->base.vm_status,
> + &entry->base.bo->vm_bo->vm->zombies);
> entry->base.bo->vm_bo = NULL;
> - list_del(&entry->base.vm_status);
> - amdgpu_bo_unref(&entry->base.bo->shadow);
> - amdgpu_bo_unref(&entry->base.bo);
> }
> kvfree(entry->entries);
> entry->entries = NULL;
> @@ -1624,6 +1638,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
> r = vm->update_funcs->commit(¶ms, fence);
>
> error_unlock:
> + amdgpu_vm_free_zombie_bo(adev, vm);
> amdgpu_vm_eviction_unlock(vm);
> return r;
> }
> @@ -2807,6 +2822,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> INIT_LIST_HEAD(&vm->invalidated);
> spin_lock_init(&vm->invalidated_lock);
> INIT_LIST_HEAD(&vm->freed);
> + INIT_LIST_HEAD(&vm->zombies);
>
>
> /* create scheduler entities for page table updates */
> @@ -3119,6 +3135,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
> }
>
> amdgpu_vm_free_pts(adev, vm, NULL);
> + amdgpu_vm_free_zombie_bo(adev, vm);
> +
> amdgpu_bo_unreserve(root);
> amdgpu_bo_unref(&root);
> WARN_ON(vm->root.base.bo);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index b5705fcfc935..9baf44fa16f0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -269,6 +269,9 @@ struct amdgpu_vm {
> /* BO mappings freed, but not yet updated in the PT */
> struct list_head freed;
>
> + /* BO will be freed soon */
> + struct list_head zombies;
> +
> /* contains the page directory */
> struct amdgpu_vm_pt root;
> struct dma_fence *last_update;
More information about the amd-gfx
mailing list