[PATCH 2/3] drm/amdgpu: sync to KFD fences before clearing PTEs

Felix Kuehling felix.kuehling at amd.com
Wed Aug 21 20:01:07 UTC 2024


On 2024-08-21 08:03, Christian König wrote:
> This patch tries to solve the basic problem we also need to sync to
> the KFD fences of the BO because otherwise it can be that we clear
> PTEs while the KFD queues are still running.

This is going to trigger a lot of phantom KFD evictions and will tank 
performance. It's probably not what you intended.

Regards,
   Felix


>
> Signed-off-by: Christian König <christian.koenig at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 30 ++++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h |  1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   |  6 +++++
>   3 files changed, 37 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> index bdf1ef825d89..c586ab4c911b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> @@ -260,6 +260,36 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
>   	return 0;
>   }
>   
> +/**
> + * amdgpu_sync_kfd - sync to KFD fences
> + *
> + * @sync: sync object to add KFD fences to
> + * @resv: reservation object with KFD fences
> + *
> + * Extract all KFD fences and add them to the sync object.
> + */
> +int amdgpu_sync_kfd(struct amdgpu_sync *sync, struct dma_resv *resv)
> +{
> +	struct dma_resv_iter cursor;
> +	struct dma_fence *f;
> +	int r = 0;
> +
> +	dma_resv_iter_begin(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP);
> +	dma_resv_for_each_fence_unlocked(&cursor, f) {
> +		void *fence_owner = amdgpu_sync_get_owner(f);
> +
> +		if (fence_owner != AMDGPU_FENCE_OWNER_KFD)
> +			continue;
> +
> +		r = amdgpu_sync_fence(sync, f);
> +		if (r)
> +			break;
> +	}
> +	dma_resv_iter_end(&cursor);
> +
> +	return r;
> +}
> +
>   /* Free the entry back to the slab */
>   static void amdgpu_sync_entry_free(struct amdgpu_sync_entry *e)
>   {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
> index cf1e9e858efd..e3272dce798d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
> @@ -51,6 +51,7 @@ int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f);
>   int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
>   		     struct dma_resv *resv, enum amdgpu_sync_mode mode,
>   		     void *owner);
> +int amdgpu_sync_kfd(struct amdgpu_sync *sync, struct dma_resv *resv);
>   struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
>   				     struct amdgpu_ring *ring);
>   struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index ba99d428610a..13d429b91327 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1168,6 +1168,12 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>   				     AMDGPU_SYNC_EQ_OWNER, vm);
>   		if (r)
>   			goto error_free;
> +		if (bo) {
> +			r = amdgpu_sync_kfd(&sync, bo->tbo.base.resv);
> +			if (r)
> +				goto error_free;
> +		}
> +
>   	} else {
>   		struct drm_gem_object *obj = &bo->tbo.base;
>   


More information about the amd-gfx mailing list