[RFC PATCH v6] drm/amdgpu: Remove kfd eviction fence before release bo
Felix Kuehling
felix.kuehling at amd.com
Wed Feb 19 19:06:28 UTC 2020
On 2020-02-19 7:46, xinhui pan wrote:
> No need to trigger eviction as the memory mapping will not be used
> anymore.
>
> All pt/pd bos share same resv, hence the same shared eviction fence.
> Everytime page table is freed, the fence will be signled and that cuases
> kfd unexcepted evictions.
>
> CC: Christian König <christian.koenig at amd.com>
> CC: Felix Kuehling <felix.kuehling at amd.com>
> CC: Alex Deucher <alexander.deucher at amd.com>
> Acked-by: Christian König <christian.koenig at amd.com>
> Signed-off-by: xinhui pan <xinhui.pan at amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>
> ---
> change from v5:
> use trylock instead, and add warn_on_once to give a limitd warning.
>
> change from v4:
> based on new ttm code.
>
> change from v3:
> fix a coding error
>
> change from v2:
> based on Chris' drm/ttm: rework BO delayed delete patchset.
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 +
> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 38 +++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 5 +++
> 3 files changed, 44 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index 9e8db702d878..0ee8aae6c519 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -96,6 +96,7 @@ struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
> struct mm_struct *mm);
> bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
> struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
> +int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo);
>
> struct amdkfd_process_info {
> /* List head of all VMs that belong to a KFD process */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index ef721cb65868..898851bec377 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -276,6 +276,42 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
> return 0;
> }
>
> +int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo)
> +{
> + struct amdgpu_bo *root = bo;
> + struct amdgpu_vm_bo_base *vm_bo;
> + struct amdgpu_vm *vm;
> + struct amdkfd_process_info *info;
> + struct amdgpu_amdkfd_fence *ef;
> + int ret;
> +
> + /* we can always get vm_bo from root PD bo.*/
> + while (root->parent)
> + root = root->parent;
> +
> + vm_bo = root->vm_bo;
> + if (!vm_bo)
> + return 0;
> +
> + vm = vm_bo->vm;
> + if (!vm)
> + return 0;
> +
> + info = vm->process_info;
> + if (!info || !info->eviction_fence)
> + return 0;
> +
> + ef = container_of(dma_fence_get(&info->eviction_fence->base),
> + struct amdgpu_amdkfd_fence, base);
> +
> + BUG_ON(!dma_resv_trylock(bo->tbo.base.resv));
> + ret = amdgpu_amdkfd_remove_eviction_fence(bo, ef);
> + dma_resv_unlock(bo->tbo.base.resv);
> +
> + dma_fence_put(&ef->base);
> + return ret;
> +}
> +
> static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
> bool wait)
> {
> @@ -1045,6 +1081,8 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
> list_del(&vm->vm_list_node);
> mutex_unlock(&process_info->lock);
>
> + vm->process_info = NULL;
> +
> /* Release per-process resources when last compute VM is destroyed */
> if (!process_info->n_vms) {
> WARN_ON(!list_empty(&process_info->kfd_bo_list));
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 6f60a581e3ba..5766d20f29d8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -1307,6 +1307,11 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
> if (abo->kfd_bo)
> amdgpu_amdkfd_unreserve_memory_limit(abo);
>
> + /* We only remove the fence if the resv has individualized. */
> + WARN_ON_ONCE(bo->base.resv != &bo->base._resv);
> + if (bo->base.resv == &bo->base._resv)
> + amdgpu_amdkfd_remove_fence_on_pt_pd_bos(abo);
> +
> if (bo->mem.mem_type != TTM_PL_VRAM || !bo->mem.mm_node ||
> !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE))
> return;
More information about the amd-gfx
mailing list