[PATCH v4 2/2] drm/amdgpu: unref pt bo after job submit

Felix Kuehling felix.kuehling at amd.com
Fri Mar 13 17:36:23 UTC 2020


This seems weird. This means that we update a page table, and then free 
it in the same amdgpu_vm_update_ptes call? That means the update is 
redundant. Can we eliminate the redundant PTE update if the page table 
is about to be freed anyway?

Regards,
   Felix

On 2020-03-13 12:09, xinhui pan wrote:
> Free page table bo before job submit is insane.
> We might touch invalid memory while job is runnig.
>
> we now have individualized bo resv during bo releasing.
> So any fences added to root PT bo is actually untested when
> a normal PT bo is releasing.
>
> We might hit gmc page fault or memory just got overwrited.
>
> Cc: Christian König <christian.koenig at amd.com>
> Cc: Alex Deucher <alexander.deucher at amd.com>
> Cc: Felix Kuehling <Felix.Kuehling at amd.com>
> Signed-off-by: xinhui pan <xinhui.pan at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 24 +++++++++++++++++++++---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  3 +++
>   2 files changed, 24 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 73398831196f..346e2f753474 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -937,6 +937,21 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>   	return r;
>   }
>   
> +static void amdgpu_vm_free_zombie_bo(struct amdgpu_device *adev,
> +		struct amdgpu_vm *vm)
> +{
> +	struct amdgpu_vm_pt *entry;
> +
> +	while (!list_empty(&vm->zombies)) {
> +		entry = list_first_entry(&vm->zombies, struct amdgpu_vm_pt,
> +				base.vm_status);
> +		list_del(&entry->base.vm_status);
> +
> +		amdgpu_bo_unref(&entry->base.bo->shadow);
> +		amdgpu_bo_unref(&entry->base.bo);
> +	}
> +}
> +
>   /**
>    * amdgpu_vm_free_table - fre one PD/PT
>    *
> @@ -945,10 +960,9 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>   static void amdgpu_vm_free_table(struct amdgpu_vm_pt *entry)
>   {
>   	if (entry->base.bo) {
> +		list_move(&entry->base.vm_status,
> +				&entry->base.bo->vm_bo->vm->zombies);
>   		entry->base.bo->vm_bo = NULL;
> -		list_del(&entry->base.vm_status);
> -		amdgpu_bo_unref(&entry->base.bo->shadow);
> -		amdgpu_bo_unref(&entry->base.bo);
>   	}
>   	kvfree(entry->entries);
>   	entry->entries = NULL;
> @@ -1624,6 +1638,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>   	r = vm->update_funcs->commit(&params, fence);
>   
>   error_unlock:
> +	amdgpu_vm_free_zombie_bo(adev, vm);
>   	amdgpu_vm_eviction_unlock(vm);
>   	return r;
>   }
> @@ -2807,6 +2822,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   	INIT_LIST_HEAD(&vm->invalidated);
>   	spin_lock_init(&vm->invalidated_lock);
>   	INIT_LIST_HEAD(&vm->freed);
> +	INIT_LIST_HEAD(&vm->zombies);
>   
>   
>   	/* create scheduler entities for page table updates */
> @@ -3119,6 +3135,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>   	}
>   
>   	amdgpu_vm_free_pts(adev, vm, NULL);
> +	amdgpu_vm_free_zombie_bo(adev, vm);
> +
>   	amdgpu_bo_unreserve(root);
>   	amdgpu_bo_unref(&root);
>   	WARN_ON(vm->root.base.bo);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index b5705fcfc935..9baf44fa16f0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -269,6 +269,9 @@ struct amdgpu_vm {
>   	/* BO mappings freed, but not yet updated in the PT */
>   	struct list_head	freed;
>   
> +	/* BO will be freed soon */
> +	struct list_head	zombies;
> +
>   	/* contains the page directory */
>   	struct amdgpu_vm_pt     root;
>   	struct dma_fence	*last_update;


More information about the amd-gfx mailing list