[PATCH v5 1/2] drm/amdgpu: implement TLB flush fence
Bhardwaj, Rajneesh
rajneesh.bhardwaj at amd.com
Mon Mar 11 19:48:35 UTC 2024
Acked-and-tested-by: Rajneesh Bhardwaj <rajneesh.bhardwaj at amd.com>
On 3/11/2024 10:37 AM, Sharma, Shashank wrote:
>
> On 07/03/2024 20:22, Philip Yang wrote:
>>
>>
>> On 2024-03-06 09:41, Shashank Sharma wrote:
>>> From: Christian König<christian.koenig at amd.com>
>>>
>>> The problem is that when (for example) 4k pages are replaced
>>> with a single 2M page we need to wait for change to be flushed
>>> out by invalidating the TLB before the PT can be freed.
>>>
>>> Solve this by moving the TLB flush into a DMA-fence object which
>>> can be used to delay the freeing of the PT BOs until it is signaled.
>>>
>>> V2: (Shashank)
>>> - rebase
>>> - set dma_fence_error only in case of error
>>> - add tlb_flush fence only when PT/PD BO is locked (Felix)
>>> - use vm->pasid when f is NULL (Mukul)
>>>
>>> V4: - add a wait for (f->dependency) in tlb_fence_work (Christian)
>>> - move the misplaced fence_create call to the end (Philip)
>>>
>>> V5: - free the f->dependency properly (Christian)
>>>
>>> Cc: Christian Koenig<christian.koenig at amd.com>
>>> Cc: Felix Kuehling<Felix.Kuehling at amd.com>
>>> Cc: Rajneesh Bhardwaj<rajneesh.bhardwaj at amd.com>
>>> Cc: Alex Deucher<alexander.deucher at amd.com>
>>> Reviewed-by: Shashank Sharma<shashank.sharma at amd.com>
>>> Signed-off-by: Christian König<christian.koenig at amd.com>
>>> Signed-off-by: Shashank Sharma<shashank.sharma at amd.com>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/Makefile | 3 +-
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 10 ++
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 4 +
>>> .../gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c | 112
>>> ++++++++++++++++++
>>> 4 files changed, 128 insertions(+), 1 deletion(-)
>>> create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile
>>> b/drivers/gpu/drm/amd/amdgpu/Makefile
>>> index fa26a4e3a99d..91ab4cf29b5b 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
>>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
>>> @@ -70,7 +70,8 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o
>>> amdgpu_kms.o \
>>> amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o \
>>> atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \
>>> atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
>>> - amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_ib.o
>>> amdgpu_pll.o \
>>> + amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o
>>> amdgpu_vm_tlb_fence.o \
>>> + amdgpu_ib.o amdgpu_pll.o \
>>> amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
>>> amdgpu_gtt_mgr.o amdgpu_preempt_mgr.o amdgpu_vram_mgr.o
>>> amdgpu_virt.o \
>>> amdgpu_atomfirmware.o amdgpu_vf_error.o amdgpu_sched.o \
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> index 0960e0a665d3..310aae6fb49b 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> @@ -988,6 +988,15 @@ int amdgpu_vm_update_range(struct amdgpu_device
>>> *adev, struct amdgpu_vm *vm,
>>> r = vm->update_funcs->commit(¶ms, fence);
>>> + /* Prepare a TLB flush fence to be attached to PTs */
>>> + if (!unlocked && params.needs_flush && vm->is_compute_context) {
>>> + amdgpu_vm_tlb_fence_create(adev, vm, fence);
>>> +
>>> + /* Makes sure no PD/PT is freed before the flush */
>>> + dma_resv_add_fence(vm->root.bo->tbo.base.resv, *fence,
>>> + DMA_RESV_USAGE_BOOKKEEP);
>>> + }
>>> +
>>> error_unlock:
>>> amdgpu_vm_eviction_unlock(vm);
>>> drm_dev_exit(idx);
>>> @@ -2237,6 +2246,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev,
>>> struct amdgpu_vm *vm,
>>> mutex_init(&vm->eviction_lock);
>>> vm->evicting = false;
>>> + vm->tlb_fence_context = dma_fence_context_alloc(1);
>>> r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level,
>>> false, &root, xcp_id);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> index 64b3f69efa57..298f604b8e5f 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> @@ -341,6 +341,7 @@ struct amdgpu_vm {
>>> atomic64_t tlb_seq;
>>> uint64_t tlb_seq_va;
>>> uint64_t *tlb_seq_cpu_addr;
>>> + uint64_t tlb_fence_context;
>>> atomic64_t kfd_last_flushed_seq;
>>> @@ -594,5 +595,8 @@ void amdgpu_vm_update_fault_cache(struct
>>> amdgpu_device *adev,
>>> uint64_t addr,
>>> uint32_t status,
>>> unsigned int vmhub);
>>> +void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev,
>>> + struct amdgpu_vm *vm,
>>> + struct dma_fence **fence);
>>> #endif
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c
>>> new file mode 100644
>>> index 000000000000..51cddfa3f1e8
>>> --- /dev/null
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c
>>> @@ -0,0 +1,112 @@
>>> +// SPDX-License-Identifier: GPL-2.0 OR MIT
>>> +/*
>>> + * Copyright 2023 Advanced Micro Devices, Inc.
>>> + *
>>> + * Permission is hereby granted, free of charge, to any person
>>> obtaining a
>>> + * copy of this software and associated documentation files (the
>>> "Software"),
>>> + * to deal in the Software without restriction, including without
>>> limitation
>>> + * the rights to use, copy, modify, merge, publish, distribute,
>>> sublicense,
>>> + * and/or sell copies of the Software, and to permit persons to
>>> whom the
>>> + * Software is furnished to do so, subject to the following
>>> conditions:
>>> + *
>>> + * The above copyright notice and this permission notice shall be
>>> included in
>>> + * all copies or substantial portions of the Software.
>>> + *
>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
>>> EXPRESS OR
>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>>> MERCHANTABILITY,
>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
>>> EVENT SHALL
>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM,
>>> DAMAGES OR
>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
>>> OTHERWISE,
>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
>>> USE OR
>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>> + */
>>> +
>>> +#include <linux/dma-fence.h>
>>> +#include <linux/workqueue.h>
>>> +
>>> +#include "amdgpu.h"
>>> +#include "amdgpu_vm.h"
>>> +#include "amdgpu_gmc.h"
>>> +
>>> +struct amdgpu_tlb_fence {
>>> + struct dma_fence base;
>>> + struct amdgpu_device *adev;
>>> + struct dma_fence *dependency;
>>> + struct work_struct work;
>>> + spinlock_t lock;
>>> + uint16_t pasid;
>>> +
>>> +};
>>> +
>>> +static const char *amdgpu_tlb_fence_get_driver_name(struct
>>> dma_fence *fence)
>>> +{
>>> + return "amdgpu tlb fence";
>>> +}
>>> +
>>> +static const char *amdgpu_tlb_fence_get_timeline_name(struct
>>> dma_fence *f)
>>> +{
>>> + return "amdgpu tlb timeline";
>>> +}
>>> +
>>> +static void amdgpu_tlb_fence_work(struct work_struct *work)
>>> +{
>>> + struct amdgpu_tlb_fence *f = container_of(work, typeof(*f), work);
>>> + int r;
>>> +
>>> + if (f->dependency) {
>>> + dma_fence_wait(f->dependency, false);
>>> + dma_fence_put(f->dependency);
>>> + f->dependency = NULL;
>>> + }
>>> +
>>> + r = amdgpu_gmc_flush_gpu_tlb_pasid(f->adev, f->pasid, 2, true, 0);
>>
>> To flush all XCCs, as this is a corner case, we could start with this
>> to make it correct for SPX mode for now, with extra flush for other
>> modes.
>>
>> int num_xcc = f->adev->gfx.xcc_mask ?
>> NUM_XCC(f->adev->gfx.xcc_mask) : 1;
>> uint32_t xcc_mask = GENMASK(num_xcc - 1, 0);
>> int i;
>>
>> for_each_inst(i, xcc_mask)
>> r = amdgpu_gmc_flush_gpu_tlb_pasid(f->adev, f->pasid,
>> TLB_FLUSH_LEGACY, true, i);
>
> Thanks for this input, Philip.
>
> @Christian, your feedback for this ?
>
> - Shashank
>
>>
>> Regards,
>> Philip
>>> + if (r) {
>>> + dev_err(f->adev->dev, "TLB flush failed for PASID %d.\n",
>>> + f->pasid);
>>> + dma_fence_set_error(&f->base, r);
>>> + }
>>> +
>>> + dma_fence_signal(&f->base);
>>> + dma_fence_put(&f->base);
>>> +}
>>> +
>>> +static const struct dma_fence_ops amdgpu_tlb_fence_ops = {
>>> + .use_64bit_seqno = true,
>>> + .get_driver_name = amdgpu_tlb_fence_get_driver_name,
>>> + .get_timeline_name = amdgpu_tlb_fence_get_timeline_name
>>> +};
>>> +
>>> +void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev, struct
>>> amdgpu_vm *vm,
>>> + struct dma_fence **fence)
>>> +{
>>> + struct amdgpu_tlb_fence *f;
>>> +
>>> + f = kmalloc(sizeof(*f), GFP_KERNEL);
>>> + if (!f) {
>>> + /*
>>> + * We can't fail since the PDEs and PTEs are already
>>> updated, so
>>> + * just block for the dependency and execute the TLB flush
>>> + */
>>> + if (*fence)
>>> + dma_fence_wait(*fence, false);
>>> +
>>> + amdgpu_gmc_flush_gpu_tlb_pasid(adev, vm->pasid, 2, true, 0);
>>> + *fence = dma_fence_get_stub();
>>> + return;
>>> + }
>>> +
>>> + f->adev = adev;
>>> + f->dependency = *fence;
>>> + f->pasid = vm->pasid;
>>> + INIT_WORK(&f->work, amdgpu_tlb_fence_work);
>>> + spin_lock_init(&f->lock);
>>> +
>>> + dma_fence_init(&f->base, &amdgpu_tlb_fence_ops, &f->lock,
>>> + vm->tlb_fence_context, atomic64_read(&vm->tlb_seq));
>>> +
>>> + /* TODO: We probably need a separate wq here */
>>> + dma_fence_get(&f->base);
>>> + schedule_work(&f->work);
>>> +
>>> + *fence = &f->base;
>>> +}
More information about the amd-gfx
mailing list