[PATCH 4/5] drm/sched: Re-group and rename the entity run-queue lock

Tvrtko Ursulin tvrtko.ursulin at igalia.com
Tue Oct 15 13:16:19 UTC 2024


On 15/10/2024 12:56, Philipp Stanner wrote:
> On Mon, 2024-10-14 at 11:46 +0100, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin at igalia.com>
>>
>> Christian suggested to rename the lock and improve the documentation
> 
> Let's move it to Annotators:
> Suggested-by: Christian König <christian.koenig at amd.com>

Ack.

> (Otherwise some time in the future a Christian Kaiser might start
> working on the scheduler on steal the praise ^^)
> 
>> of
>> what it protects.
> 
> So without Christian's name here I'd phrase it as:
> "When writing to a drm_sched_entity's run-queue, writers are protected
> through the lock drm_sched_entity.rq_lock. This naming, however,
> frequently collides with the separate internal lock of struct
> drm_sched_rq, resulting in uses like this:
> 
> 	spin_lock(&entity->rq_lock);
> 	spin_lock(&entity->rq->lock);
> 
> Rename drm_sched_entity.rq_lock to improve readability. While at it,
> re-order that struct's members to make it more obvious what the lock
> protects.

Will copy&paste - thanks for typing it out.

>> And to also re-order the structure members so all
>> protected by the lock are together in a block.
> 
> 
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at igalia.com>
>> Cc: Christian König <christian.koenig at amd.com>
>> Cc: Alex Deucher <alexander.deucher at amd.com>
>> Cc: Luben Tuikov <ltuikov89 at gmail.com>
>> Cc: Matthew Brost <matthew.brost at intel.com>
>> Cc: Philipp Stanner <pstanner at redhat.com>
>> Reviewed-by: Christian König <christian.koenig at amd.com>
>> ---
>>   drivers/gpu/drm/scheduler/sched_entity.c | 28 ++++++++++++----------
>> --
>>   drivers/gpu/drm/scheduler/sched_main.c   |  2 +-
>>   include/drm/gpu_scheduler.h              | 15 +++++++------
>>   3 files changed, 23 insertions(+), 22 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/scheduler/sched_entity.c
>> b/drivers/gpu/drm/scheduler/sched_entity.c
>> index b72cba292839..c013c2b49aa5 100644
>> --- a/drivers/gpu/drm/scheduler/sched_entity.c
>> +++ b/drivers/gpu/drm/scheduler/sched_entity.c
>> @@ -105,7 +105,7 @@ int drm_sched_entity_init(struct drm_sched_entity
>> *entity,
>>   	/* We start in an idle state. */
>>   	complete_all(&entity->entity_idle);
>>   
>> -	spin_lock_init(&entity->rq_lock);
>> +	spin_lock_init(&entity->lock);
>>   	spsc_queue_init(&entity->job_queue);
>>   
>>   	atomic_set(&entity->fence_seq, 0);
>> @@ -133,10 +133,10 @@ void drm_sched_entity_modify_sched(struct
>> drm_sched_entity *entity,
>>   {
>>   	WARN_ON(!num_sched_list || !sched_list);
>>   
>> -	spin_lock(&entity->rq_lock);
>> +	spin_lock(&entity->lock);
>>   	entity->sched_list = sched_list;
>>   	entity->num_sched_list = num_sched_list;
>> -	spin_unlock(&entity->rq_lock);
>> +	spin_unlock(&entity->lock);
>>   }
>>   EXPORT_SYMBOL(drm_sched_entity_modify_sched);
>>   
>> @@ -244,10 +244,10 @@ static void drm_sched_entity_kill(struct
>> drm_sched_entity *entity)
>>   	if (!entity->rq)
>>   		return;
>>   
>> -	spin_lock(&entity->rq_lock);
>> +	spin_lock(&entity->lock);
>>   	entity->stopped = true;
>>   	drm_sched_rq_remove_entity(entity->rq, entity);
>> -	spin_unlock(&entity->rq_lock);
>> +	spin_unlock(&entity->lock);
>>   
>>   	/* Make sure this entity is not used by the scheduler at the
>> moment */
>>   	wait_for_completion(&entity->entity_idle);
>> @@ -396,9 +396,9 @@ static void drm_sched_entity_wakeup(struct
>> dma_fence *f,
>>   void drm_sched_entity_set_priority(struct drm_sched_entity *entity,
>>   				   enum drm_sched_priority priority)
>>   {
>> -	spin_lock(&entity->rq_lock);
>> +	spin_lock(&entity->lock);
>>   	entity->priority = priority;
>> -	spin_unlock(&entity->rq_lock);
>> +	spin_unlock(&entity->lock);
>>   }
>>   EXPORT_SYMBOL(drm_sched_entity_set_priority);
>>   
>> @@ -515,10 +515,10 @@ struct drm_sched_job
>> *drm_sched_entity_pop_job(struct drm_sched_entity *entity)
>>   
>>   		next = to_drm_sched_job(spsc_queue_peek(&entity-
>>> job_queue));
>>   		if (next) {
>> -			spin_lock(&entity->rq_lock);
>> +			spin_lock(&entity->lock);
>>   			drm_sched_rq_update_fifo_locked(entity,
>>   							next-
>>> submit_ts);
>> -			spin_unlock(&entity->rq_lock);
>> +			spin_unlock(&entity->lock);
>>   		}
>>   	}
>>   
>> @@ -559,14 +559,14 @@ void drm_sched_entity_select_rq(struct
>> drm_sched_entity *entity)
>>   	if (fence && !dma_fence_is_signaled(fence))
>>   		return;
>>   
>> -	spin_lock(&entity->rq_lock);
>> +	spin_lock(&entity->lock);
>>   	sched = drm_sched_pick_best(entity->sched_list, entity-
>>> num_sched_list);
>>   	rq = sched ? sched->sched_rq[entity->priority] : NULL;
>>   	if (rq != entity->rq) {
>>   		drm_sched_rq_remove_entity(entity->rq, entity);
>>   		entity->rq = rq;
>>   	}
>> -	spin_unlock(&entity->rq_lock);
>> +	spin_unlock(&entity->lock);
>>   
>>   	if (entity->num_sched_list == 1)
>>   		entity->sched_list = NULL;
>> @@ -605,9 +605,9 @@ void drm_sched_entity_push_job(struct
>> drm_sched_job *sched_job)
>>   		struct drm_sched_rq *rq;
>>   
>>   		/* Add the entity to the run queue */
>> -		spin_lock(&entity->rq_lock);
>> +		spin_lock(&entity->lock);
>>   		if (entity->stopped) {
>> -			spin_unlock(&entity->rq_lock);
>> +			spin_unlock(&entity->lock);
>>   
>>   			DRM_ERROR("Trying to push to a killed
>> entity\n");
>>   			return;
>> @@ -621,7 +621,7 @@ void drm_sched_entity_push_job(struct
>> drm_sched_job *sched_job)
>>   		if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
>>   			drm_sched_rq_update_fifo_locked(entity,
>> submit_ts);
>>   
>> -		spin_unlock(&entity->rq_lock);
>> +		spin_unlock(&entity->lock);
>>   
>>   		drm_sched_wakeup(sched);
>>   	}
>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c
>> b/drivers/gpu/drm/scheduler/sched_main.c
>> index 07ee386b8e4b..2670bf9f34b2 100644
>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>> @@ -176,7 +176,7 @@ void drm_sched_rq_update_fifo_locked(struct
>> drm_sched_entity *entity, ktime_t ts
>>   	 * for entity from within concurrent
>> drm_sched_entity_select_rq and the
>>   	 * other to update the rb tree structure.
>>   	 */
>> -	lockdep_assert_held(&entity->rq_lock);
>> +	lockdep_assert_held(&entity->lock);
>>   
>>   	spin_lock(&entity->rq->lock);
>>   
>> diff --git a/include/drm/gpu_scheduler.h
>> b/include/drm/gpu_scheduler.h
>> index b6d095074c19..683fff8939e4 100644
>> --- a/include/drm/gpu_scheduler.h
>> +++ b/include/drm/gpu_scheduler.h
>> @@ -96,6 +96,14 @@ struct drm_sched_entity {
>>   	 */
>>   	struct list_head		list;
>>   
> 
> Uh, btw, while reviewing, I just saw that we still have that FIXME
> further up:
> 
> 	/**
> 	 * @rq:
> 	 *
> 	 * Runqueue on which this entity is currently scheduled.
> 	 *
> 	 * FIXME: Locking is very unclear for this. Writers are protected by
> 	 * @rq_lock, but readers are generally lockless and seem to just race
> 	 * with not even a READ_ONCE.
> 	 */
> 	struct drm_sched_rq		*rq;
> 
> At the very least, rq_lock should be renamed here, too. AFAICS the

Good catch!

> series doesn't solve the FIXME, so we keep it, agreed?

Yep.

Regards,

Tvrtko

>> +	/**
>> +	 * @lock:
>> +	 *
>> +	 * Lock protecting the run-queue (@rq) to which this entity
>> belongs,
>> +	 * @priority and the list of schedulers (@sched_list,
>> @num_sched_list).
>> +	 */
>> +	spinlock_t			lock;
>> +
>>   	/**
>>   	 * @rq:
>>   	 *
>> @@ -140,13 +148,6 @@ struct drm_sched_entity {
>>   	 */
> 
> I think this comment here above also uses the term "rq_lock". While
> you're fixing it, maybe also do a quick grep for "rq_lock" in case I
> overlooked it somewhere else. I stopped drinking coffee today, so...
> 
> 
> Thx,
> P.
> 
>>   	enum drm_sched_priority         priority;
>>   
>> -	/**
>> -	 * @rq_lock:
>> -	 *
>> -	 * Lock to modify the runqueue to which this entity belongs.
>> -	 */
>> -	spinlock_t			rq_lock;
>> -
>>   	/**
>>   	 * @job_queue: the list of jobs of this entity.
>>   	 */
> 


More information about the dri-devel mailing list