[PATCH v7 11/12] drm/amdgpu: introduce userqueue eviction fence

Christian König christian.koenig at amd.com
Thu Oct 26 12:37:31 UTC 2023


Am 10.10.23 um 12:17 schrieb Shashank Sharma:
> This patch adds support for userqueue eviction fences. In general, when
> a process wants to map VRAM memory but TTM can't find enough space, it
> attempts to evict BOs from its LRU list. This fence will prevent the TTM
> manager from evicting the process's BOs from VRAM.

Maybe write that the other way around. The eviction fence unmaps the 
userq when memory management requests it.

>
> The general idea behind this is:
> - Eviction fence is initialized during the uq_mgr init and saved in
>    fpriv->uq_mgr.
> - This fence is attached to every userqueue object (MQD, ctx, doorbell
>    and wptr) in a shared way, during the queue creation.
> - The fence is signaled during the queue destruction.
>
> Cc: Alex Deucher <alexander.deucher at amd.com>
> Cc: Christian Koenig <christian.koenig at amd.com>
> Cc: Felix Kuehling <felix.kuehling at amd.com>
> Signed-off-by: Shashank Sharma <shashank.sharma at amd.com>
> Signed-off-by: Arvind Yadav <arvind.yadav at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 82 ++++++++++++++++++-
>   drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c        |  7 ++
>   .../gpu/drm/amd/include/amdgpu_userqueue.h    | 15 ++++
>   3 files changed, 103 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> index 6bae014b248e..26cdd54acd74 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> @@ -21,7 +21,7 @@
>    * OTHER DEALINGS IN THE SOFTWARE.
>    *
>    */
> -
> +#include <linux/atomic.h>
>   #include "amdgpu.h"
>   #include "amdgpu_vm.h"
>   #include "amdgpu_userqueue.h"
> @@ -45,6 +45,66 @@ amdgpu_userqueue_find(struct amdgpu_userq_mgr *uq_mgr, int qid)
>   	return idr_find(&uq_mgr->userq_idr, qid);
>   }
>   
> +static const char *
> +amdgpu_userqueue_fence_get_driver_name(struct dma_fence *fence)
> +{
> +	return "amdgpu";
> +}
> +
> +static const char *
> +amdgpu_userqueue_fence_get_timeline_name(struct dma_fence *f)
> +{
> +	struct amdgpu_userq_fence *ef = container_of(f, struct amdgpu_userq_fence, base);
> +
> +	return ef->timeline_name;
> +}
> +
> +static const struct dma_fence_ops amdgpu_userqueue_eviction_fence_ops = {
> +	.use_64bit_seqno = true,
> +	.get_driver_name = amdgpu_userqueue_fence_get_driver_name,
> +	.get_timeline_name = amdgpu_userqueue_fence_get_timeline_name,
> +};
> +
> +static void
> +amdgpu_userqueue_init_eviction_fence(struct amdgpu_userq_mgr *uq_mgr)
> +{
> +	struct amdgpu_userq_fence *fence = &uq_mgr->eviction_fence;
> +	atomic_t seq = ATOMIC_INIT(0);
> +
> +	spin_lock_init(&fence->lock);
> +	fence->fence_ctx = dma_fence_context_alloc(1);

Not much of an issue, but that should be per userq_mgr.

> +	fence->seq = seq;

Same here, the seq needs to be in the userq_mgr and not inside the fence 
itself.

> +	get_task_comm(fence->timeline_name, current);
> +	dma_fence_init(&fence->base, &amdgpu_userqueue_eviction_fence_ops,
> +			&fence->lock, fence->fence_ctx,
> +			atomic_inc_return(&fence->seq));
> +}
> +
> +struct dma_fence *
> +amdgpu_userqueue_attach_eviction_fence(struct amdgpu_userq_mgr *uq_mgr,
> +				       struct amdgpu_bo *bo)
> +{
> +	struct dma_fence *ef = &uq_mgr->eviction_fence.base;
> +	struct dma_resv *resv = bo->tbo.base.resv;
> +	int ret;
> +
> +	ret = dma_resv_reserve_fences(resv, 1);

That should usually be done while locking the BO and not later on.

The problem is that when you have multiple BO where to add the fence to 
you can't risk adding it only to some of them.

> +	if (ret) {
> +		dma_fence_wait(ef, false);
> +		return NULL;
> +	}
> +
> +	dma_resv_add_fence(resv, ef, DMA_RESV_USAGE_READ);

Please use DMA_RESV_USAGE_BOOKMARK for this.

> +	return dma_fence_get(ef);

Why do you need to return the fence here?

> +}
> +
> +void
> +amdgpu_userqueue_signal_eviction_fence(struct dma_fence *ef)
> +{
> +	dma_fence_signal(ef);
> +	dma_fence_put(ef);
> +}
> +
>   int amdgpu_userqueue_create_object(struct amdgpu_userq_mgr *uq_mgr,
>   				   struct amdgpu_userq_obj *userq_obj,
>   				   int size)
> @@ -88,6 +148,13 @@ int amdgpu_userqueue_create_object(struct amdgpu_userq_mgr *uq_mgr,
>   	}
>   
>   	userq_obj->gpu_addr = amdgpu_bo_gpu_offset(userq_obj->obj);
> +	userq_obj->ev_fence = amdgpu_userqueue_attach_eviction_fence(uq_mgr, userq_obj->obj);
> +	if (!userq_obj->ev_fence) {
> +		DRM_ERROR("Failed to attach eviction fence to FW object\n");
> +		r = -EFAULT;
> +		goto unresv;
> +	}
> +

We don't need to keep the ev_fence around here.

>   	amdgpu_bo_unreserve(userq_obj->obj);
>   	memset(userq_obj->cpu_ptr, 0, size);
>   	return 0;
> @@ -103,6 +170,7 @@ int amdgpu_userqueue_create_object(struct amdgpu_userq_mgr *uq_mgr,
>   void amdgpu_userqueue_destroy_object(struct amdgpu_userq_mgr *uq_mgr,
>   				   struct amdgpu_userq_obj *userq_obj)
>   {
> +	amdgpu_userqueue_signal_eviction_fence(userq_obj->ev_fence);
>   	amdgpu_bo_kunmap(userq_obj->obj);
>   	amdgpu_bo_unref(&userq_obj->obj);
>   }
> @@ -140,11 +208,21 @@ amdgpu_userqueue_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr,
>   		goto unpin_bo;
>   	}
>   
> +	db_obj->ev_fence = amdgpu_userqueue_attach_eviction_fence(uq_mgr, db_obj->obj);
> +	if (!db_obj->ev_fence) {
> +		DRM_ERROR("[Usermode queues] Failed to attach eviction fence with db_bo\n");
> +		r = -EFAULT;
> +		goto unres_bo;
> +	}
> +
>   	index = amdgpu_doorbell_index_on_bar(uq_mgr->adev, db_obj->obj, doorbell_offset);
>   	DRM_DEBUG_DRIVER("[Usermode queues] doorbell index=%lld\n", index);
>   	amdgpu_bo_unreserve(db_obj->obj);
>   	return index;
>   
> +unres_bo:
> +	amdgpu_bo_unreserve(db_obj->obj);
> +
>   unpin_bo:
>   	amdgpu_bo_unpin(db_obj->obj);
>   
> @@ -171,6 +249,7 @@ amdgpu_userqueue_destroy(struct drm_file *filp, int queue_id)
>   
>   	amdgpu_bo_unpin(queue->db_obj.obj);
>   	amdgpu_bo_unref(&queue->db_obj.obj);
> +	amdgpu_userqueue_signal_eviction_fence(queue->db_obj.ev_fence);
>   	amdgpu_userqueue_cleanup(uq_mgr, queue, queue_id);
>   	mutex_unlock(&uq_mgr->userq_mutex);
>   	return 0;
> @@ -278,6 +357,7 @@ int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct amdgpu_devi
>   	idr_init_base(&userq_mgr->userq_idr, 1);
>   	userq_mgr->adev = adev;
>   
> +	amdgpu_userqueue_init_eviction_fence(userq_mgr);
>   	return 0;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> index 33de65a0d974..e68320b4c689 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> @@ -6469,6 +6469,12 @@ gfx_v11_0_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr,
>   		return ret;
>   	}
>   
> +	wptr_obj->ev_fence = amdgpu_userqueue_attach_eviction_fence(uq_mgr, wptr_obj->obj);

Again you don't need to keep the ev_fence around here. The dma_resv 
object will keep the reference.

Looks like a start, but quite a bunch of stuff to do.

> +	if (!wptr_obj->ev_fence) {
> +		DRM_ERROR("Failed to attach eviction fence to wptr bo\n");
> +		return -EFAULT;
> +	}
> +
>   	queue->wptr_obj.gpu_addr = amdgpu_bo_gpu_offset_no_check(wptr_obj->obj);
>   	return 0;
>   }
> @@ -6650,6 +6656,7 @@ static void
>   gfx_v11_0_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue)
>   {
>   	gfx_v11_0_userq_unmap(uq_mgr, queue);
> +	amdgpu_userqueue_signal_eviction_fence(queue->wptr_obj.ev_fence);
>   	amdgpu_userqueue_destroy_object(uq_mgr, &queue->fw_obj);
>   	amdgpu_userqueue_destroy_object(uq_mgr, &queue->mqd);
>   }
> diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
> index a653e31350c5..f1e3d311ae18 100644
> --- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
> +++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
> @@ -33,6 +33,7 @@ struct amdgpu_userq_obj {
>   	void		 *cpu_ptr;
>   	uint64_t	 gpu_addr;
>   	struct amdgpu_bo *obj;
> +	struct dma_fence *ev_fence;
>   };
>   
>   struct amdgpu_usermode_queue {
> @@ -57,11 +58,20 @@ struct amdgpu_userq_funcs {
>   			    struct amdgpu_usermode_queue *uq);
>   };
>   
> +struct amdgpu_userq_fence {
> +	u64		 fence_ctx;
> +	atomic_t	 seq;
> +	spinlock_t	 lock;
> +	struct dma_fence base;
> +	char		 timeline_name[TASK_COMM_LEN];
> +};
> +
>   /* Usermode queues for gfx */
>   struct amdgpu_userq_mgr {
>   	struct idr			userq_idr;
>   	struct mutex			userq_mutex;
>   	struct amdgpu_device		*adev;
> +	struct amdgpu_userq_fence	eviction_fence;
>   };
>   
>   int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
> @@ -76,4 +86,9 @@ int amdgpu_userqueue_create_object(struct amdgpu_userq_mgr *uq_mgr,
>   
>   void amdgpu_userqueue_destroy_object(struct amdgpu_userq_mgr *uq_mgr,
>   				     struct amdgpu_userq_obj *userq_obj);
> +
> +struct dma_fence *amdgpu_userqueue_attach_eviction_fence(struct amdgpu_userq_mgr *uq_mgr,
> +							 struct amdgpu_bo *bo);
> +
> +void amdgpu_userqueue_signal_eviction_fence(struct dma_fence *ef);
>   #endif



More information about the amd-gfx mailing list