[PATCH 4/5] drm/sched: Re-group and rename the entity run-queue lock
Philipp Stanner
pstanner at redhat.com
Tue Oct 15 11:56:28 UTC 2024
On Mon, 2024-10-14 at 11:46 +0100, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin at igalia.com>
>
> Christian suggested to rename the lock and improve the documentation
Let's move it to Annotators:
Suggested-by: Christian König <christian.koenig at amd.com>
(Otherwise some time in the future a Christian Kaiser might start
working on the scheduler on steal the praise ^^)
> of
> what it protects.
So without Christian's name here I'd phrase it as:
"When writing to a drm_sched_entity's run-queue, writers are protected
through the lock drm_sched_entity.rq_lock. This naming, however,
frequently collides with the separate internal lock of struct
drm_sched_rq, resulting in uses like this:
spin_lock(&entity->rq_lock);
spin_lock(&entity->rq->lock);
Rename drm_sched_entity.rq_lock to improve readability. While at it,
re-order that struct's members to make it more obvious what the lock
protects.
> And to also re-order the structure members so all
> protected by the lock are together in a block.
>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at igalia.com>
> Cc: Christian König <christian.koenig at amd.com>
> Cc: Alex Deucher <alexander.deucher at amd.com>
> Cc: Luben Tuikov <ltuikov89 at gmail.com>
> Cc: Matthew Brost <matthew.brost at intel.com>
> Cc: Philipp Stanner <pstanner at redhat.com>
> Reviewed-by: Christian König <christian.koenig at amd.com>
> ---
> drivers/gpu/drm/scheduler/sched_entity.c | 28 ++++++++++++----------
> --
> drivers/gpu/drm/scheduler/sched_main.c | 2 +-
> include/drm/gpu_scheduler.h | 15 +++++++------
> 3 files changed, 23 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/gpu/drm/scheduler/sched_entity.c
> b/drivers/gpu/drm/scheduler/sched_entity.c
> index b72cba292839..c013c2b49aa5 100644
> --- a/drivers/gpu/drm/scheduler/sched_entity.c
> +++ b/drivers/gpu/drm/scheduler/sched_entity.c
> @@ -105,7 +105,7 @@ int drm_sched_entity_init(struct drm_sched_entity
> *entity,
> /* We start in an idle state. */
> complete_all(&entity->entity_idle);
>
> - spin_lock_init(&entity->rq_lock);
> + spin_lock_init(&entity->lock);
> spsc_queue_init(&entity->job_queue);
>
> atomic_set(&entity->fence_seq, 0);
> @@ -133,10 +133,10 @@ void drm_sched_entity_modify_sched(struct
> drm_sched_entity *entity,
> {
> WARN_ON(!num_sched_list || !sched_list);
>
> - spin_lock(&entity->rq_lock);
> + spin_lock(&entity->lock);
> entity->sched_list = sched_list;
> entity->num_sched_list = num_sched_list;
> - spin_unlock(&entity->rq_lock);
> + spin_unlock(&entity->lock);
> }
> EXPORT_SYMBOL(drm_sched_entity_modify_sched);
>
> @@ -244,10 +244,10 @@ static void drm_sched_entity_kill(struct
> drm_sched_entity *entity)
> if (!entity->rq)
> return;
>
> - spin_lock(&entity->rq_lock);
> + spin_lock(&entity->lock);
> entity->stopped = true;
> drm_sched_rq_remove_entity(entity->rq, entity);
> - spin_unlock(&entity->rq_lock);
> + spin_unlock(&entity->lock);
>
> /* Make sure this entity is not used by the scheduler at the
> moment */
> wait_for_completion(&entity->entity_idle);
> @@ -396,9 +396,9 @@ static void drm_sched_entity_wakeup(struct
> dma_fence *f,
> void drm_sched_entity_set_priority(struct drm_sched_entity *entity,
> enum drm_sched_priority priority)
> {
> - spin_lock(&entity->rq_lock);
> + spin_lock(&entity->lock);
> entity->priority = priority;
> - spin_unlock(&entity->rq_lock);
> + spin_unlock(&entity->lock);
> }
> EXPORT_SYMBOL(drm_sched_entity_set_priority);
>
> @@ -515,10 +515,10 @@ struct drm_sched_job
> *drm_sched_entity_pop_job(struct drm_sched_entity *entity)
>
> next = to_drm_sched_job(spsc_queue_peek(&entity-
> >job_queue));
> if (next) {
> - spin_lock(&entity->rq_lock);
> + spin_lock(&entity->lock);
> drm_sched_rq_update_fifo_locked(entity,
> next-
> >submit_ts);
> - spin_unlock(&entity->rq_lock);
> + spin_unlock(&entity->lock);
> }
> }
>
> @@ -559,14 +559,14 @@ void drm_sched_entity_select_rq(struct
> drm_sched_entity *entity)
> if (fence && !dma_fence_is_signaled(fence))
> return;
>
> - spin_lock(&entity->rq_lock);
> + spin_lock(&entity->lock);
> sched = drm_sched_pick_best(entity->sched_list, entity-
> >num_sched_list);
> rq = sched ? sched->sched_rq[entity->priority] : NULL;
> if (rq != entity->rq) {
> drm_sched_rq_remove_entity(entity->rq, entity);
> entity->rq = rq;
> }
> - spin_unlock(&entity->rq_lock);
> + spin_unlock(&entity->lock);
>
> if (entity->num_sched_list == 1)
> entity->sched_list = NULL;
> @@ -605,9 +605,9 @@ void drm_sched_entity_push_job(struct
> drm_sched_job *sched_job)
> struct drm_sched_rq *rq;
>
> /* Add the entity to the run queue */
> - spin_lock(&entity->rq_lock);
> + spin_lock(&entity->lock);
> if (entity->stopped) {
> - spin_unlock(&entity->rq_lock);
> + spin_unlock(&entity->lock);
>
> DRM_ERROR("Trying to push to a killed
> entity\n");
> return;
> @@ -621,7 +621,7 @@ void drm_sched_entity_push_job(struct
> drm_sched_job *sched_job)
> if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
> drm_sched_rq_update_fifo_locked(entity,
> submit_ts);
>
> - spin_unlock(&entity->rq_lock);
> + spin_unlock(&entity->lock);
>
> drm_sched_wakeup(sched);
> }
> diff --git a/drivers/gpu/drm/scheduler/sched_main.c
> b/drivers/gpu/drm/scheduler/sched_main.c
> index 07ee386b8e4b..2670bf9f34b2 100644
> --- a/drivers/gpu/drm/scheduler/sched_main.c
> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> @@ -176,7 +176,7 @@ void drm_sched_rq_update_fifo_locked(struct
> drm_sched_entity *entity, ktime_t ts
> * for entity from within concurrent
> drm_sched_entity_select_rq and the
> * other to update the rb tree structure.
> */
> - lockdep_assert_held(&entity->rq_lock);
> + lockdep_assert_held(&entity->lock);
>
> spin_lock(&entity->rq->lock);
>
> diff --git a/include/drm/gpu_scheduler.h
> b/include/drm/gpu_scheduler.h
> index b6d095074c19..683fff8939e4 100644
> --- a/include/drm/gpu_scheduler.h
> +++ b/include/drm/gpu_scheduler.h
> @@ -96,6 +96,14 @@ struct drm_sched_entity {
> */
> struct list_head list;
>
Uh, btw, while reviewing, I just saw that we still have that FIXME
further up:
/**
* @rq:
*
* Runqueue on which this entity is currently scheduled.
*
* FIXME: Locking is very unclear for this. Writers are protected by
* @rq_lock, but readers are generally lockless and seem to just race
* with not even a READ_ONCE.
*/
struct drm_sched_rq *rq;
At the very least, rq_lock should be renamed here, too. AFAICS the
series doesn't solve the FIXME, so we keep it, agreed?
> + /**
> + * @lock:
> + *
> + * Lock protecting the run-queue (@rq) to which this entity
> belongs,
> + * @priority and the list of schedulers (@sched_list,
> @num_sched_list).
> + */
> + spinlock_t lock;
> +
> /**
> * @rq:
> *
> @@ -140,13 +148,6 @@ struct drm_sched_entity {
> */
I think this comment here above also uses the term "rq_lock". While
you're fixing it, maybe also do a quick grep for "rq_lock" in case I
overlooked it somewhere else. I stopped drinking coffee today, so...
Thx,
P.
> enum drm_sched_priority priority;
>
> - /**
> - * @rq_lock:
> - *
> - * Lock to modify the runqueue to which this entity belongs.
> - */
> - spinlock_t rq_lock;
> -
> /**
> * @job_queue: the list of jobs of this entity.
> */
More information about the dri-devel
mailing list