[PATCH v4 1/2] drm/amdgpu: implement TLB flush fence
Christian König
ckoenig.leichtzumerken at gmail.com
Fri Mar 1 12:59:08 UTC 2024
Am 01.03.24 um 12:07 schrieb Shashank Sharma:
> From: Christian König <christian.koenig at amd.com>
>
> The problem is that when (for example) 4k pages are replaced
> with a single 2M page we need to wait for change to be flushed
> out by invalidating the TLB before the PT can be freed.
>
> Solve this by moving the TLB flush into a DMA-fence object which
> can be used to delay the freeing of the PT BOs until it is signaled.
>
> V2: (Shashank)
> - rebase
> - set dma_fence_error only in case of error
> - add tlb_flush fence only when PT/PD BO is locked (Felix)
> - use vm->pasid when f is NULL (Mukul)
>
> V4: - add a wait for (f->dependency) in tlb_fence_work (Christian)
> - move the misplaced fence_create call to the end (Philip)
>
> Cc: Christian Koenig <christian.koenig at amd.com>
> Cc: Felix Kuehling <Felix.Kuehling at amd.com>
> Cc: Rajneesh Bhardwaj <rajneesh.bhardwaj at amd.com>
> Cc: Alex Deucher <alexander.deucher at amd.com>
> Signed-off-by: Christian König <christian.koenig at amd.com>
> Signed-off-by: Shashank Sharma <shashank.sharma at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/Makefile | 3 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 10 ++
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 4 +
> .../gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c | 111 ++++++++++++++++++
> 4 files changed, 127 insertions(+), 1 deletion(-)
> create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
> index fa26a4e3a99d..91ab4cf29b5b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
> @@ -70,7 +70,8 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \
> amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o \
> atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \
> atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
> - amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_ib.o amdgpu_pll.o \
> + amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_vm_tlb_fence.o \
> + amdgpu_ib.o amdgpu_pll.o \
> amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
> amdgpu_gtt_mgr.o amdgpu_preempt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o \
> amdgpu_atomfirmware.o amdgpu_vf_error.o amdgpu_sched.o \
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 0960e0a665d3..310aae6fb49b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -988,6 +988,15 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>
> r = vm->update_funcs->commit(¶ms, fence);
>
> + /* Prepare a TLB flush fence to be attached to PTs */
> + if (!unlocked && params.needs_flush && vm->is_compute_context) {
> + amdgpu_vm_tlb_fence_create(adev, vm, fence);
> +
> + /* Makes sure no PD/PT is freed before the flush */
> + dma_resv_add_fence(vm->root.bo->tbo.base.resv, *fence,
> + DMA_RESV_USAGE_BOOKKEEP);
> + }
> +
> error_unlock:
> amdgpu_vm_eviction_unlock(vm);
> drm_dev_exit(idx);
> @@ -2237,6 +2246,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>
> mutex_init(&vm->eviction_lock);
> vm->evicting = false;
> + vm->tlb_fence_context = dma_fence_context_alloc(1);
>
> r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level,
> false, &root, xcp_id);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 64b3f69efa57..298f604b8e5f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -341,6 +341,7 @@ struct amdgpu_vm {
> atomic64_t tlb_seq;
> uint64_t tlb_seq_va;
> uint64_t *tlb_seq_cpu_addr;
> + uint64_t tlb_fence_context;
>
> atomic64_t kfd_last_flushed_seq;
>
> @@ -594,5 +595,8 @@ void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev,
> uint64_t addr,
> uint32_t status,
> unsigned int vmhub);
> +void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev,
> + struct amdgpu_vm *vm,
> + struct dma_fence **fence);
>
> #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c
> new file mode 100644
> index 000000000000..54c33c24fa46
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c
> @@ -0,0 +1,111 @@
> +// SPDX-License-Identifier: GPL-2.0 OR MIT
> +/*
> + * Copyright 2023 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + */
> +
> +#include <linux/dma-fence.h>
> +#include <linux/workqueue.h>
> +
> +#include "amdgpu.h"
> +#include "amdgpu_vm.h"
> +#include "amdgpu_gmc.h"
> +
> +struct amdgpu_tlb_fence {
> + struct dma_fence base;
> + struct amdgpu_device *adev;
> + struct dma_fence *dependency;
> + struct work_struct work;
> + spinlock_t lock;
> + uint16_t pasid;
> +
> +};
> +
> +static const char *amdgpu_tlb_fence_get_driver_name(struct dma_fence *fence)
> +{
> + return "amdgpu tlb fence";
> +}
> +
> +static const char *amdgpu_tlb_fence_get_timeline_name(struct dma_fence *f)
> +{
> + return "amdgpu tlb timeline";
> +}
> +
> +static void amdgpu_tlb_fence_work(struct work_struct *work)
> +{
> + struct amdgpu_tlb_fence *f = container_of(work, typeof(*f), work);
> + int r;
> +
> + if (f->dependency) {
> + dma_fence_wait(f->dependency, false);
> + f->dependency = dma_fence_get_stub();
That doesn't correctly drops the f->depency reference.
You need something like "dma_fence_put(f->depency); f->dependency =
NULL;" here instead.
Apart from that looks good to me,
Christian.
> + }
> +
> + r = amdgpu_gmc_flush_gpu_tlb_pasid(f->adev, f->pasid, 2, true, 0);
> + if (r) {
> + dev_err(f->adev->dev, "TLB flush failed for PASID %d.\n",
> + f->pasid);
> + dma_fence_set_error(&f->base, r);
> + }
> +
> + dma_fence_signal(&f->base);
> + dma_fence_put(&f->base);
> +}
> +
> +static const struct dma_fence_ops amdgpu_tlb_fence_ops = {
> + .use_64bit_seqno = true,
> + .get_driver_name = amdgpu_tlb_fence_get_driver_name,
> + .get_timeline_name = amdgpu_tlb_fence_get_timeline_name
> +};
> +
> +void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> + struct dma_fence **fence)
> +{
> + struct amdgpu_tlb_fence *f;
> +
> + f = kmalloc(sizeof(*f), GFP_KERNEL);
> + if (!f) {
> + /*
> + * We can't fail since the PDEs and PTEs are already updated, so
> + * just block for the dependency and execute the TLB flush
> + */
> + if (*fence)
> + dma_fence_wait(*fence, false);
> +
> + amdgpu_gmc_flush_gpu_tlb_pasid(adev, vm->pasid, 2, true, 0);
> + *fence = dma_fence_get_stub();
> + return;
> + }
> +
> + f->adev = adev;
> + f->dependency = *fence;
> + f->pasid = vm->pasid;
> + INIT_WORK(&f->work, amdgpu_tlb_fence_work);
> + spin_lock_init(&f->lock);
> +
> + dma_fence_init(&f->base, &amdgpu_tlb_fence_ops, &f->lock,
> + vm->tlb_fence_context, atomic64_read(&vm->tlb_seq));
> +
> + /* TODO: We probably need a separate wq here */
> + dma_fence_get(&f->base);
> + schedule_work(&f->work);
> +
> + *fence = &f->base;
> +}
More information about the amd-gfx
mailing list