[PATCH 4/5] drm/sched: Re-group and rename the entity run-queue lock

Philipp Stanner pstanner at redhat.com
Tue Oct 15 11:56:28 UTC 2024


On Mon, 2024-10-14 at 11:46 +0100, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin at igalia.com>
> 
> Christian suggested to rename the lock and improve the documentation

Let's move it to Annotators:
Suggested-by: Christian König <christian.koenig at amd.com>

(Otherwise some time in the future a Christian Kaiser might start
working on the scheduler on steal the praise ^^)

> of
> what it protects. 

So without Christian's name here I'd phrase it as:
"When writing to a drm_sched_entity's run-queue, writers are protected
through the lock drm_sched_entity.rq_lock. This naming, however,
frequently collides with the separate internal lock of struct
drm_sched_rq, resulting in uses like this:

	spin_lock(&entity->rq_lock);
	spin_lock(&entity->rq->lock);

Rename drm_sched_entity.rq_lock to improve readability. While at it,
re-order that struct's members to make it more obvious what the lock
protects.

> And to also re-order the structure members so all
> protected by the lock are together in a block.


> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at igalia.com>
> Cc: Christian König <christian.koenig at amd.com>
> Cc: Alex Deucher <alexander.deucher at amd.com>
> Cc: Luben Tuikov <ltuikov89 at gmail.com>
> Cc: Matthew Brost <matthew.brost at intel.com>
> Cc: Philipp Stanner <pstanner at redhat.com>
> Reviewed-by: Christian König <christian.koenig at amd.com>
> ---
>  drivers/gpu/drm/scheduler/sched_entity.c | 28 ++++++++++++----------
> --
>  drivers/gpu/drm/scheduler/sched_main.c   |  2 +-
>  include/drm/gpu_scheduler.h              | 15 +++++++------
>  3 files changed, 23 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/gpu/drm/scheduler/sched_entity.c
> b/drivers/gpu/drm/scheduler/sched_entity.c
> index b72cba292839..c013c2b49aa5 100644
> --- a/drivers/gpu/drm/scheduler/sched_entity.c
> +++ b/drivers/gpu/drm/scheduler/sched_entity.c
> @@ -105,7 +105,7 @@ int drm_sched_entity_init(struct drm_sched_entity
> *entity,
>  	/* We start in an idle state. */
>  	complete_all(&entity->entity_idle);
>  
> -	spin_lock_init(&entity->rq_lock);
> +	spin_lock_init(&entity->lock);
>  	spsc_queue_init(&entity->job_queue);
>  
>  	atomic_set(&entity->fence_seq, 0);
> @@ -133,10 +133,10 @@ void drm_sched_entity_modify_sched(struct
> drm_sched_entity *entity,
>  {
>  	WARN_ON(!num_sched_list || !sched_list);
>  
> -	spin_lock(&entity->rq_lock);
> +	spin_lock(&entity->lock);
>  	entity->sched_list = sched_list;
>  	entity->num_sched_list = num_sched_list;
> -	spin_unlock(&entity->rq_lock);
> +	spin_unlock(&entity->lock);
>  }
>  EXPORT_SYMBOL(drm_sched_entity_modify_sched);
>  
> @@ -244,10 +244,10 @@ static void drm_sched_entity_kill(struct
> drm_sched_entity *entity)
>  	if (!entity->rq)
>  		return;
>  
> -	spin_lock(&entity->rq_lock);
> +	spin_lock(&entity->lock);
>  	entity->stopped = true;
>  	drm_sched_rq_remove_entity(entity->rq, entity);
> -	spin_unlock(&entity->rq_lock);
> +	spin_unlock(&entity->lock);
>  
>  	/* Make sure this entity is not used by the scheduler at the
> moment */
>  	wait_for_completion(&entity->entity_idle);
> @@ -396,9 +396,9 @@ static void drm_sched_entity_wakeup(struct
> dma_fence *f,
>  void drm_sched_entity_set_priority(struct drm_sched_entity *entity,
>  				   enum drm_sched_priority priority)
>  {
> -	spin_lock(&entity->rq_lock);
> +	spin_lock(&entity->lock);
>  	entity->priority = priority;
> -	spin_unlock(&entity->rq_lock);
> +	spin_unlock(&entity->lock);
>  }
>  EXPORT_SYMBOL(drm_sched_entity_set_priority);
>  
> @@ -515,10 +515,10 @@ struct drm_sched_job
> *drm_sched_entity_pop_job(struct drm_sched_entity *entity)
>  
>  		next = to_drm_sched_job(spsc_queue_peek(&entity-
> >job_queue));
>  		if (next) {
> -			spin_lock(&entity->rq_lock);
> +			spin_lock(&entity->lock);
>  			drm_sched_rq_update_fifo_locked(entity,
>  							next-
> >submit_ts);
> -			spin_unlock(&entity->rq_lock);
> +			spin_unlock(&entity->lock);
>  		}
>  	}
>  
> @@ -559,14 +559,14 @@ void drm_sched_entity_select_rq(struct
> drm_sched_entity *entity)
>  	if (fence && !dma_fence_is_signaled(fence))
>  		return;
>  
> -	spin_lock(&entity->rq_lock);
> +	spin_lock(&entity->lock);
>  	sched = drm_sched_pick_best(entity->sched_list, entity-
> >num_sched_list);
>  	rq = sched ? sched->sched_rq[entity->priority] : NULL;
>  	if (rq != entity->rq) {
>  		drm_sched_rq_remove_entity(entity->rq, entity);
>  		entity->rq = rq;
>  	}
> -	spin_unlock(&entity->rq_lock);
> +	spin_unlock(&entity->lock);
>  
>  	if (entity->num_sched_list == 1)
>  		entity->sched_list = NULL;
> @@ -605,9 +605,9 @@ void drm_sched_entity_push_job(struct
> drm_sched_job *sched_job)
>  		struct drm_sched_rq *rq;
>  
>  		/* Add the entity to the run queue */
> -		spin_lock(&entity->rq_lock);
> +		spin_lock(&entity->lock);
>  		if (entity->stopped) {
> -			spin_unlock(&entity->rq_lock);
> +			spin_unlock(&entity->lock);
>  
>  			DRM_ERROR("Trying to push to a killed
> entity\n");
>  			return;
> @@ -621,7 +621,7 @@ void drm_sched_entity_push_job(struct
> drm_sched_job *sched_job)
>  		if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
>  			drm_sched_rq_update_fifo_locked(entity,
> submit_ts);
>  
> -		spin_unlock(&entity->rq_lock);
> +		spin_unlock(&entity->lock);
>  
>  		drm_sched_wakeup(sched);
>  	}
> diff --git a/drivers/gpu/drm/scheduler/sched_main.c
> b/drivers/gpu/drm/scheduler/sched_main.c
> index 07ee386b8e4b..2670bf9f34b2 100644
> --- a/drivers/gpu/drm/scheduler/sched_main.c
> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> @@ -176,7 +176,7 @@ void drm_sched_rq_update_fifo_locked(struct
> drm_sched_entity *entity, ktime_t ts
>  	 * for entity from within concurrent
> drm_sched_entity_select_rq and the
>  	 * other to update the rb tree structure.
>  	 */
> -	lockdep_assert_held(&entity->rq_lock);
> +	lockdep_assert_held(&entity->lock);
>  
>  	spin_lock(&entity->rq->lock);
>  
> diff --git a/include/drm/gpu_scheduler.h
> b/include/drm/gpu_scheduler.h
> index b6d095074c19..683fff8939e4 100644
> --- a/include/drm/gpu_scheduler.h
> +++ b/include/drm/gpu_scheduler.h
> @@ -96,6 +96,14 @@ struct drm_sched_entity {
>  	 */
>  	struct list_head		list;
>  

Uh, btw, while reviewing, I just saw that we still have that FIXME
further up:

	/**
	 * @rq:
	 *
	 * Runqueue on which this entity is currently scheduled.
	 *
	 * FIXME: Locking is very unclear for this. Writers are protected by
	 * @rq_lock, but readers are generally lockless and seem to just race
	 * with not even a READ_ONCE.
	 */
	struct drm_sched_rq		*rq;

At the very least, rq_lock should be renamed here, too. AFAICS the
series doesn't solve the FIXME, so we keep it, agreed?


> +	/**
> +	 * @lock:
> +	 *
> +	 * Lock protecting the run-queue (@rq) to which this entity
> belongs,
> +	 * @priority and the list of schedulers (@sched_list,
> @num_sched_list).
> +	 */
> +	spinlock_t			lock;
> +
>  	/**
>  	 * @rq:
>  	 *
> @@ -140,13 +148,6 @@ struct drm_sched_entity {
>  	 */

I think this comment here above also uses the term "rq_lock". While
you're fixing it, maybe also do a quick grep for "rq_lock" in case I
overlooked it somewhere else. I stopped drinking coffee today, so...


Thx,
P.

>  	enum drm_sched_priority         priority;
>  
> -	/**
> -	 * @rq_lock:
> -	 *
> -	 * Lock to modify the runqueue to which this entity belongs.
> -	 */
> -	spinlock_t			rq_lock;
> -
>  	/**
>  	 * @job_queue: the list of jobs of this entity.
>  	 */



More information about the dri-devel mailing list