[PATCH 4/5] drm/sched: Re-group and rename the entity run-queue lock
Tvrtko Ursulin
tvrtko.ursulin at igalia.com
Tue Oct 15 13:16:19 UTC 2024
On 15/10/2024 12:56, Philipp Stanner wrote:
> On Mon, 2024-10-14 at 11:46 +0100, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin at igalia.com>
>>
>> Christian suggested to rename the lock and improve the documentation
>
> Let's move it to Annotators:
> Suggested-by: Christian König <christian.koenig at amd.com>
Ack.
> (Otherwise some time in the future a Christian Kaiser might start
> working on the scheduler on steal the praise ^^)
>
>> of
>> what it protects.
>
> So without Christian's name here I'd phrase it as:
> "When writing to a drm_sched_entity's run-queue, writers are protected
> through the lock drm_sched_entity.rq_lock. This naming, however,
> frequently collides with the separate internal lock of struct
> drm_sched_rq, resulting in uses like this:
>
> spin_lock(&entity->rq_lock);
> spin_lock(&entity->rq->lock);
>
> Rename drm_sched_entity.rq_lock to improve readability. While at it,
> re-order that struct's members to make it more obvious what the lock
> protects.
Will copy&paste - thanks for typing it out.
>> And to also re-order the structure members so all
>> protected by the lock are together in a block.
>
>
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at igalia.com>
>> Cc: Christian König <christian.koenig at amd.com>
>> Cc: Alex Deucher <alexander.deucher at amd.com>
>> Cc: Luben Tuikov <ltuikov89 at gmail.com>
>> Cc: Matthew Brost <matthew.brost at intel.com>
>> Cc: Philipp Stanner <pstanner at redhat.com>
>> Reviewed-by: Christian König <christian.koenig at amd.com>
>> ---
>> drivers/gpu/drm/scheduler/sched_entity.c | 28 ++++++++++++----------
>> --
>> drivers/gpu/drm/scheduler/sched_main.c | 2 +-
>> include/drm/gpu_scheduler.h | 15 +++++++------
>> 3 files changed, 23 insertions(+), 22 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/scheduler/sched_entity.c
>> b/drivers/gpu/drm/scheduler/sched_entity.c
>> index b72cba292839..c013c2b49aa5 100644
>> --- a/drivers/gpu/drm/scheduler/sched_entity.c
>> +++ b/drivers/gpu/drm/scheduler/sched_entity.c
>> @@ -105,7 +105,7 @@ int drm_sched_entity_init(struct drm_sched_entity
>> *entity,
>> /* We start in an idle state. */
>> complete_all(&entity->entity_idle);
>>
>> - spin_lock_init(&entity->rq_lock);
>> + spin_lock_init(&entity->lock);
>> spsc_queue_init(&entity->job_queue);
>>
>> atomic_set(&entity->fence_seq, 0);
>> @@ -133,10 +133,10 @@ void drm_sched_entity_modify_sched(struct
>> drm_sched_entity *entity,
>> {
>> WARN_ON(!num_sched_list || !sched_list);
>>
>> - spin_lock(&entity->rq_lock);
>> + spin_lock(&entity->lock);
>> entity->sched_list = sched_list;
>> entity->num_sched_list = num_sched_list;
>> - spin_unlock(&entity->rq_lock);
>> + spin_unlock(&entity->lock);
>> }
>> EXPORT_SYMBOL(drm_sched_entity_modify_sched);
>>
>> @@ -244,10 +244,10 @@ static void drm_sched_entity_kill(struct
>> drm_sched_entity *entity)
>> if (!entity->rq)
>> return;
>>
>> - spin_lock(&entity->rq_lock);
>> + spin_lock(&entity->lock);
>> entity->stopped = true;
>> drm_sched_rq_remove_entity(entity->rq, entity);
>> - spin_unlock(&entity->rq_lock);
>> + spin_unlock(&entity->lock);
>>
>> /* Make sure this entity is not used by the scheduler at the
>> moment */
>> wait_for_completion(&entity->entity_idle);
>> @@ -396,9 +396,9 @@ static void drm_sched_entity_wakeup(struct
>> dma_fence *f,
>> void drm_sched_entity_set_priority(struct drm_sched_entity *entity,
>> enum drm_sched_priority priority)
>> {
>> - spin_lock(&entity->rq_lock);
>> + spin_lock(&entity->lock);
>> entity->priority = priority;
>> - spin_unlock(&entity->rq_lock);
>> + spin_unlock(&entity->lock);
>> }
>> EXPORT_SYMBOL(drm_sched_entity_set_priority);
>>
>> @@ -515,10 +515,10 @@ struct drm_sched_job
>> *drm_sched_entity_pop_job(struct drm_sched_entity *entity)
>>
>> next = to_drm_sched_job(spsc_queue_peek(&entity-
>>> job_queue));
>> if (next) {
>> - spin_lock(&entity->rq_lock);
>> + spin_lock(&entity->lock);
>> drm_sched_rq_update_fifo_locked(entity,
>> next-
>>> submit_ts);
>> - spin_unlock(&entity->rq_lock);
>> + spin_unlock(&entity->lock);
>> }
>> }
>>
>> @@ -559,14 +559,14 @@ void drm_sched_entity_select_rq(struct
>> drm_sched_entity *entity)
>> if (fence && !dma_fence_is_signaled(fence))
>> return;
>>
>> - spin_lock(&entity->rq_lock);
>> + spin_lock(&entity->lock);
>> sched = drm_sched_pick_best(entity->sched_list, entity-
>>> num_sched_list);
>> rq = sched ? sched->sched_rq[entity->priority] : NULL;
>> if (rq != entity->rq) {
>> drm_sched_rq_remove_entity(entity->rq, entity);
>> entity->rq = rq;
>> }
>> - spin_unlock(&entity->rq_lock);
>> + spin_unlock(&entity->lock);
>>
>> if (entity->num_sched_list == 1)
>> entity->sched_list = NULL;
>> @@ -605,9 +605,9 @@ void drm_sched_entity_push_job(struct
>> drm_sched_job *sched_job)
>> struct drm_sched_rq *rq;
>>
>> /* Add the entity to the run queue */
>> - spin_lock(&entity->rq_lock);
>> + spin_lock(&entity->lock);
>> if (entity->stopped) {
>> - spin_unlock(&entity->rq_lock);
>> + spin_unlock(&entity->lock);
>>
>> DRM_ERROR("Trying to push to a killed
>> entity\n");
>> return;
>> @@ -621,7 +621,7 @@ void drm_sched_entity_push_job(struct
>> drm_sched_job *sched_job)
>> if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
>> drm_sched_rq_update_fifo_locked(entity,
>> submit_ts);
>>
>> - spin_unlock(&entity->rq_lock);
>> + spin_unlock(&entity->lock);
>>
>> drm_sched_wakeup(sched);
>> }
>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c
>> b/drivers/gpu/drm/scheduler/sched_main.c
>> index 07ee386b8e4b..2670bf9f34b2 100644
>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>> @@ -176,7 +176,7 @@ void drm_sched_rq_update_fifo_locked(struct
>> drm_sched_entity *entity, ktime_t ts
>> * for entity from within concurrent
>> drm_sched_entity_select_rq and the
>> * other to update the rb tree structure.
>> */
>> - lockdep_assert_held(&entity->rq_lock);
>> + lockdep_assert_held(&entity->lock);
>>
>> spin_lock(&entity->rq->lock);
>>
>> diff --git a/include/drm/gpu_scheduler.h
>> b/include/drm/gpu_scheduler.h
>> index b6d095074c19..683fff8939e4 100644
>> --- a/include/drm/gpu_scheduler.h
>> +++ b/include/drm/gpu_scheduler.h
>> @@ -96,6 +96,14 @@ struct drm_sched_entity {
>> */
>> struct list_head list;
>>
>
> Uh, btw, while reviewing, I just saw that we still have that FIXME
> further up:
>
> /**
> * @rq:
> *
> * Runqueue on which this entity is currently scheduled.
> *
> * FIXME: Locking is very unclear for this. Writers are protected by
> * @rq_lock, but readers are generally lockless and seem to just race
> * with not even a READ_ONCE.
> */
> struct drm_sched_rq *rq;
>
> At the very least, rq_lock should be renamed here, too. AFAICS the
Good catch!
> series doesn't solve the FIXME, so we keep it, agreed?
Yep.
Regards,
Tvrtko
>> + /**
>> + * @lock:
>> + *
>> + * Lock protecting the run-queue (@rq) to which this entity
>> belongs,
>> + * @priority and the list of schedulers (@sched_list,
>> @num_sched_list).
>> + */
>> + spinlock_t lock;
>> +
>> /**
>> * @rq:
>> *
>> @@ -140,13 +148,6 @@ struct drm_sched_entity {
>> */
>
> I think this comment here above also uses the term "rq_lock". While
> you're fixing it, maybe also do a quick grep for "rq_lock" in case I
> overlooked it somewhere else. I stopped drinking coffee today, so...
>
>
> Thx,
> P.
>
>> enum drm_sched_priority priority;
>>
>> - /**
>> - * @rq_lock:
>> - *
>> - * Lock to modify the runqueue to which this entity belongs.
>> - */
>> - spinlock_t rq_lock;
>> -
>> /**
>> * @job_queue: the list of jobs of this entity.
>> */
>
More information about the dri-devel
mailing list