[PATCH 1/4] drm/amdgpu: Implement page table BO fence

Thu Jun 1 20:12:26 UTC 2023

On 2023-06-01 15:31, Philip Yang wrote:
> Add pt_fence to amdgpu vm structure and implement helper functions. This
> fence will be shared by all page table BOs of the same amdgpu vm.
>
> Suggested-by: Felix Kuehling <Felix.Kuehling at amd.com>
> Signed-off-by: Philip Yang <Philip.Yang at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  2 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 45 +++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    |  4 ++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h    |  1 +
>   4 files changed, 52 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 5af954abd5ba..09c116dfda31 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1499,6 +1499,8 @@ int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
>   int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
>   			       enum amd_powergating_state state);
>   
> +struct dma_fence *amdgpu_pt_fence_create(void);
> +
>   static inline bool amdgpu_device_has_timeouts_enabled(struct amdgpu_device *adev)
>   {
>   	return amdgpu_gpu_recovery != 0 &&
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> index c694b41f6461..d9bfb0af3a09 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> @@ -57,6 +57,16 @@ struct amdgpu_fence {
>   	ktime_t				start_timestamp;
>   };
>   
> +/*
> + * Page table BO fence
> + * Fence used to ensure page table BOs are freed after TLB is flushed, to avoid
> + * H/W access corrupted data if the freed BO page is reused.
> + */
> +struct amdgpu_pt_fence {
> +	struct dma_fence base;
> +	spinlock_t lock;
> +};
> +
>   static struct kmem_cache *amdgpu_fence_slab;
>   
>   int amdgpu_fence_slab_init(void)
> @@ -79,6 +89,7 @@ void amdgpu_fence_slab_fini(void)
>    */
>   static const struct dma_fence_ops amdgpu_fence_ops;
>   static const struct dma_fence_ops amdgpu_job_fence_ops;
> +static const struct dma_fence_ops amdgpu_pt_fence_ops;
>   static inline struct amdgpu_fence *to_amdgpu_fence(struct dma_fence *f)
>   {
>   	struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base);
> @@ -852,6 +863,40 @@ static const struct dma_fence_ops amdgpu_job_fence_ops = {
>   	.release = amdgpu_job_fence_release,
>   };
>   
> +static atomic_t pt_fence_seq = ATOMIC_INIT(0);
> +
> +struct dma_fence *amdgpu_pt_fence_create(void)
> +{
> +	struct amdgpu_pt_fence *fence;
> +
> +	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
> +	if (!fence)
> +		return NULL;
> +
> +	spin_lock_init(&fence->lock);
> +	dma_fence_init(&fence->base, &amdgpu_pt_fence_ops, &fence->lock,
> +		       dma_fence_context_alloc(1), atomic_inc_return(&pt_fence_seq));

Creating a new fence context per fence is probably wrong. I think we 
only need one PT fence context per GPU or per spatial partition, or 
maybe per VM.

Regards,
   Felix

> +
> +	dma_fence_get(&fence->base);
> +	return &fence->base;
> +}
> +
> +static const char *amdgpu_pt_fence_get_timeline_name(struct dma_fence *f)
> +{
> +	return "pt_fence_timeline";
> +}
> +
> +static void amdgpu_pt_fence_release(struct dma_fence *f)
> +{
> +	kfree_rcu(f, rcu);
> +}
> +
> +static const struct dma_fence_ops amdgpu_pt_fence_ops = {
> +	.get_driver_name = amdgpu_fence_get_driver_name,
> +	.get_timeline_name = amdgpu_pt_fence_get_timeline_name,
> +	.release = amdgpu_pt_fence_release,
> +};
> +
>   /*
>    * Fence debugfs
>    */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 37b9d8a8dbec..0219398e625c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -2147,6 +2147,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>   		vm->update_funcs = &amdgpu_vm_sdma_funcs;
>   
>   	vm->last_update = dma_fence_get_stub();
> +	vm->pt_fence = amdgpu_pt_fence_create();
>   	vm->last_unlocked = dma_fence_get_stub();
>   	vm->last_tlb_flush = dma_fence_get_stub();
>   	vm->generation = 0;
> @@ -2270,6 +2271,8 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>   
>   	dma_fence_put(vm->last_update);
>   	vm->last_update = dma_fence_get_stub();
> +	dma_fence_put(vm->pt_fence);
> +	vm->pt_fence = amdgpu_pt_fence_create();
>   	vm->is_compute_context = true;
>   
>   	/* Free the shadow bo for compute VM */
> @@ -2358,6 +2361,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>   	}
>   
>   	dma_fence_put(vm->last_update);
> +	dma_fence_put(vm->pt_fence);
>   	for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
>   		amdgpu_vmid_free_reserved(adev, vm, i);
>   }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index d551fca1780e..9cc729358612 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -286,6 +286,7 @@ struct amdgpu_vm {
>   	/* contains the page directory */
>   	struct amdgpu_vm_bo_base     root;
>   	struct dma_fence	*last_update;
> +	struct dma_fence	*pt_fence;
>   
>   	/* Scheduler entities for page table updates */
>   	struct drm_sched_entity	immediate;