[PATCH 1/4] drm/amdgpu: Implement page table BO fence
Felix Kuehling
felix.kuehling at amd.com
Thu Jun 1 20:12:26 UTC 2023
On 2023-06-01 15:31, Philip Yang wrote:
> Add pt_fence to amdgpu vm structure and implement helper functions. This
> fence will be shared by all page table BOs of the same amdgpu vm.
>
> Suggested-by: Felix Kuehling <Felix.Kuehling at amd.com>
> Signed-off-by: Philip Yang <Philip.Yang at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 45 +++++++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 ++
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 1 +
> 4 files changed, 52 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 5af954abd5ba..09c116dfda31 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1499,6 +1499,8 @@ int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
> int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
> enum amd_powergating_state state);
>
> +struct dma_fence *amdgpu_pt_fence_create(void);
> +
> static inline bool amdgpu_device_has_timeouts_enabled(struct amdgpu_device *adev)
> {
> return amdgpu_gpu_recovery != 0 &&
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> index c694b41f6461..d9bfb0af3a09 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> @@ -57,6 +57,16 @@ struct amdgpu_fence {
> ktime_t start_timestamp;
> };
>
> +/*
> + * Page table BO fence
> + * Fence used to ensure page table BOs are freed after TLB is flushed, to avoid
> + * H/W access corrupted data if the freed BO page is reused.
> + */
> +struct amdgpu_pt_fence {
> + struct dma_fence base;
> + spinlock_t lock;
> +};
> +
> static struct kmem_cache *amdgpu_fence_slab;
>
> int amdgpu_fence_slab_init(void)
> @@ -79,6 +89,7 @@ void amdgpu_fence_slab_fini(void)
> */
> static const struct dma_fence_ops amdgpu_fence_ops;
> static const struct dma_fence_ops amdgpu_job_fence_ops;
> +static const struct dma_fence_ops amdgpu_pt_fence_ops;
> static inline struct amdgpu_fence *to_amdgpu_fence(struct dma_fence *f)
> {
> struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base);
> @@ -852,6 +863,40 @@ static const struct dma_fence_ops amdgpu_job_fence_ops = {
> .release = amdgpu_job_fence_release,
> };
>
> +static atomic_t pt_fence_seq = ATOMIC_INIT(0);
> +
> +struct dma_fence *amdgpu_pt_fence_create(void)
> +{
> + struct amdgpu_pt_fence *fence;
> +
> + fence = kzalloc(sizeof(*fence), GFP_KERNEL);
> + if (!fence)
> + return NULL;
> +
> + spin_lock_init(&fence->lock);
> + dma_fence_init(&fence->base, &amdgpu_pt_fence_ops, &fence->lock,
> + dma_fence_context_alloc(1), atomic_inc_return(&pt_fence_seq));
Creating a new fence context per fence is probably wrong. I think we
only need one PT fence context per GPU or per spatial partition, or
maybe per VM.
Regards,
Felix
> +
> + dma_fence_get(&fence->base);
> + return &fence->base;
> +}
> +
> +static const char *amdgpu_pt_fence_get_timeline_name(struct dma_fence *f)
> +{
> + return "pt_fence_timeline";
> +}
> +
> +static void amdgpu_pt_fence_release(struct dma_fence *f)
> +{
> + kfree_rcu(f, rcu);
> +}
> +
> +static const struct dma_fence_ops amdgpu_pt_fence_ops = {
> + .get_driver_name = amdgpu_fence_get_driver_name,
> + .get_timeline_name = amdgpu_pt_fence_get_timeline_name,
> + .release = amdgpu_pt_fence_release,
> +};
> +
> /*
> * Fence debugfs
> */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 37b9d8a8dbec..0219398e625c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -2147,6 +2147,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
> vm->update_funcs = &amdgpu_vm_sdma_funcs;
>
> vm->last_update = dma_fence_get_stub();
> + vm->pt_fence = amdgpu_pt_fence_create();
> vm->last_unlocked = dma_fence_get_stub();
> vm->last_tlb_flush = dma_fence_get_stub();
> vm->generation = 0;
> @@ -2270,6 +2271,8 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>
> dma_fence_put(vm->last_update);
> vm->last_update = dma_fence_get_stub();
> + dma_fence_put(vm->pt_fence);
> + vm->pt_fence = amdgpu_pt_fence_create();
> vm->is_compute_context = true;
>
> /* Free the shadow bo for compute VM */
> @@ -2358,6 +2361,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
> }
>
> dma_fence_put(vm->last_update);
> + dma_fence_put(vm->pt_fence);
> for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
> amdgpu_vmid_free_reserved(adev, vm, i);
> }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index d551fca1780e..9cc729358612 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -286,6 +286,7 @@ struct amdgpu_vm {
> /* contains the page directory */
> struct amdgpu_vm_bo_base root;
> struct dma_fence *last_update;
> + struct dma_fence *pt_fence;
>
> /* Scheduler entities for page table updates */
> struct drm_sched_entity immediate;
More information about the amd-gfx
mailing list