[PATCH drm-misc-next v5 4/6] drm/gpuvm: track/lock/validate external/evicted objects

Thu Oct 5 11:55:22 UTC 2023

On 9/28/23 21:16, Danilo Krummrich wrote:
> Currently the DRM GPUVM offers common infrastructure to track GPU VA
> allocations and mappings, generically connect GPU VA mappings to their
> backing buffers and perform more complex mapping operations on the GPU VA
> space.
>
> However, there are more design patterns commonly used by drivers, which
> can potentially be generalized in order to make the DRM GPUVM represent
> a basis for GPU-VM implementations. In this context, this patch aims
> at generalizing the following elements.
>
> 1) Provide a common dma-resv for GEM objects not being used outside of
>     this GPU-VM.
>
> 2) Provide tracking of external GEM objects (GEM objects which are
>     shared with other GPU-VMs).
>
> 3) Provide functions to efficiently lock all GEM objects dma-resv the
>     GPU-VM contains mappings of.
>
> 4) Provide tracking of evicted GEM objects the GPU-VM contains mappings
>     of, such that validation of evicted GEM objects is accelerated.
>
> 5) Provide some convinience functions for common patterns.
>
> Big thanks to Boris Brezillon for his help to figure out locking for
> drivers updating the GPU VA space within the fence signalling path.
>
> Suggested-by: Matthew Brost <matthew.brost at intel.com>
> Signed-off-by: Danilo Krummrich <dakr at redhat.com>
> ---
>   drivers/gpu/drm/drm_gpuvm.c | 642 ++++++++++++++++++++++++++++++++++++
>   include/drm/drm_gpuvm.h     | 240 ++++++++++++++
>   2 files changed, 882 insertions(+)
>
> diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c
> index 27100423154b..770bb3d68d1f 100644
> --- a/drivers/gpu/drm/drm_gpuvm.c
> +++ b/drivers/gpu/drm/drm_gpuvm.c
> @@ -82,6 +82,21 @@
>    * &drm_gem_object list of &drm_gpuvm_bos for an existing instance of this
>    * particular combination. If not existent a new instance is created and linked
>    * to the &drm_gem_object.
> + *
> + * &drm_gpuvm_bo structures, since unique for a given &drm_gpuvm, are also used
> + * as entry for the &drm_gpuvm's lists of external and evicted objects. Those
> + * list are maintained in order to accelerate locking of dma-resv locks and
> + * validation of evicted objects bound in a &drm_gpuvm. For instance, all
> + * &drm_gem_object's &dma_resv of a given &drm_gpuvm can be locked by calling
> + * drm_gpuvm_exec_lock(). Once locked drivers can call drm_gpuvm_validate() in
> + * order to validate all evicted &drm_gem_objects. It is also possible to lock
> + * additional &drm_gem_objects by providing the corresponding parameters to
> + * drm_gpuvm_exec_lock() as well as open code the &drm_exec loop while making
> + * use of helper functions such as drm_gpuvm_prepare_range() or
> + * drm_gpuvm_prepare_objects().
> + *
> + * Every bound &drm_gem_object is treated as external object when its &dma_resv
> + * structure is different than the &drm_gpuvm's common &dma_resv structure.
>    */
>   
>   /**
> @@ -429,6 +444,20 @@
>    * Subsequent calls to drm_gpuvm_bo_obtain() for the same &drm_gpuvm and
>    * &drm_gem_object must be able to observe previous creations and destructions
>    * of &drm_gpuvm_bos in order to keep instances unique.
> + *
> + * The &drm_gpuvm's lists for keeping track of external and evicted objects are
> + * protected against concurrent insertion / removal and iteration internally.
> + *
> + * However, drivers still need ensure to protect concurrent calls to functions
> + * iterating those lists, namely drm_gpuvm_prepare_objects() and
> + * drm_gpuvm_validate().
> + *
> + * Alternatively, drivers can set the &DRM_GPUVM_RESV_PROTECTED flag to indicate
> + * that the corresponding &dma_resv locks are held in order to protect the
> + * lists. If &DRM_GPUVM_RESV_PROTECTED is set, internal locking is disabled and
> + * the corresponding lockdep checks are enabled. This is an optimization for
> + * drivers which are capable of taking the corresponding &dma_resv locks and
> + * hence do not require internal locking.
>    */
>   
>   /**
> @@ -641,6 +670,195 @@
>    *	}
>    */
>   
> +/**
> + * get_next_vm_bo_from_list() - get the next vm_bo element
> + * @__gpuvm: The GPU VM
> + * @__list_name: The name of the list we're iterating on
> + * @__local_list: A pointer to the local list used to store already iterated items
> + * @__prev_vm_bo: The previous element we got from drm_gpuvm_get_next_cached_vm_bo()
> + *
> + * This helper is here to provide lockless list iteration. Lockless as in, the
> + * iterator releases the lock immediately after picking the first element from
> + * the list, so list insertion deletion can happen concurrently.
> + *
> + * Elements popped from the original list are kept in a local list, so removal
> + * and is_empty checks can still happen while we're iterating the list.
> + */
> +#define get_next_vm_bo_from_list(__gpuvm, __list_name, __local_list, __prev_vm_bo)	\
> +	({										\
> +		struct drm_gpuvm_bo *__vm_bo = NULL;					\
> +											\
> +		drm_gpuvm_bo_put(__prev_vm_bo);						\
> +											\
> +		spin_lock(&(__gpuvm)->__list_name.lock);				\
> +		if (!(__gpuvm)->__list_name.local_list)					\
> +			(__gpuvm)->__list_name.local_list = __local_list;		\
> +		else									\
> +			WARN_ON((__gpuvm)->__list_name.local_list != __local_list);	\
> +											\
> +		while (!list_empty(&(__gpuvm)->__list_name.list)) {			\
> +			__vm_bo = list_first_entry(&(__gpuvm)->__list_name.list,	\
> +						   struct drm_gpuvm_bo,			\
> +						   list.entry.__list_name);		\
> +			if (kref_get_unless_zero(&__vm_bo->kref)) {			\
> +				list_move_tail(&(__vm_bo)->list.entry.__list_name,	\
> +					       __local_list);				\
> +				break;							\
> +			} else {							\
> +				list_del_init(&(__vm_bo)->list.entry.__list_name);	\
> +				__vm_bo = NULL;						\
> +			}								\
> +		}									\
> +		spin_unlock(&(__gpuvm)->__list_name.lock);				\
> +											\
> +		__vm_bo;								\
> +	})
> +
> +/**
> + * for_each_vm_bo_in_list() - internal vm_bo list iterator
> + *
> + * This helper is here to provide lockless list iteration. Lockless as in, the
> + * iterator releases the lock immediately after picking the first element from the
> + * list, hence list insertion and deletion can happen concurrently.
> + *
> + * It is not allowed to re-assign the vm_bo pointer from inside this loop.
> + *
> + * Typical use:
> + *
> + *	struct drm_gpuvm_bo *vm_bo;
> + *	LIST_HEAD(my_local_list);
> + *
> + *	ret = 0;
> + *	for_each_vm_bo_in_list(gpuvm, <list_name>, &my_local_list, vm_bo) {
> + *		ret = do_something_with_vm_bo(..., vm_bo);
> + *		if (ret)
> + *			break;
> + *	}
> + *	drm_gpuvm_bo_put(vm_bo);
> + *	restore_vm_bo_list(gpuvm, <list_name>, &my_local_list);
> + *
> + *
> + * Only used for internal list iterations, not meant to be exposed to the outside
> + * world.
> + */
> +#define for_each_vm_bo_in_list(__gpuvm, __list_name, __local_list, __vm_bo)	\
> +	for (__vm_bo = get_next_vm_bo_from_list(__gpuvm, __list_name,		\
> +						__local_list, NULL);		\
> +	     __vm_bo;								\
> +	     __vm_bo = get_next_vm_bo_from_list(__gpuvm, __list_name,		\
> +						__local_list, __vm_bo))
> +
> +static inline void
> +__restore_vm_bo_list(struct drm_gpuvm *gpuvm, spinlock_t *lock,
> +		     struct list_head *list, struct list_head **local_list)
s/static inline void/static void/?  In .c files, the compiler is 
typically trusted to inline where needed.

/Thomas