[RFC 10/18] drm/sched: Implement RR via FIFO

Christian König christian.koenig at amd.com
Thu Jan 9 12:59:34 UTC 2025


Am 08.01.25 um 19:35 schrieb Tvrtko Ursulin:
> Round-robin being the non-default policy and unclear how much it is used,
> we can notice that it can be implemented using the FIFO data structures if
> we only invent a fake submit timestamp which is monotonically increasing
> inside drm_sched_rq instances.
>
> So instead of remembering which was the last entity the scheduler worker
> picked, we can bump the picked one to the bottom of the tree, achieving
> the same round-robin behaviour.
>
> Advantage is that we can consolidate to a single code path and remove a
> bunch of code. Downside is round-robin mode now needs to lock on the job
> pop path but that should not be visible.

Oh that's a really nice one. One comment at the end of the patch.

>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at igalia.com>
> Cc: Christian König <christian.koenig at amd.com>
> Cc: Danilo Krummrich <dakr at redhat.com>
> Cc: Matthew Brost <matthew.brost at intel.com>
> Cc: Philipp Stanner <pstanner at redhat.com>
> ---
>   drivers/gpu/drm/scheduler/sched_entity.c | 50 ++++++++------
>   drivers/gpu/drm/scheduler/sched_main.c   | 84 ++----------------------
>   include/drm/gpu_scheduler.h              |  5 +-
>   3 files changed, 39 insertions(+), 100 deletions(-)
>
> diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
> index 8e910586979e..2b1bc4d00b57 100644
> --- a/drivers/gpu/drm/scheduler/sched_entity.c
> +++ b/drivers/gpu/drm/scheduler/sched_entity.c
> @@ -473,9 +473,20 @@ drm_sched_job_dependency(struct drm_sched_job *job,
>   	return NULL;
>   }
>   
> +static ktime_t
> +drm_sched_rq_get_rr_deadline(struct drm_sched_rq *rq)
> +{
> +	lockdep_assert_held(&rq->lock);
> +
> +	rq->rr_deadline = ktime_add_ns(rq->rr_deadline, 1);
> +
> +	return rq->rr_deadline;
> +}
> +
>   struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity)
>   {
> -	struct drm_sched_job *sched_job;
> +	struct drm_sched_job *sched_job, *next_job;
> +	struct drm_sched_rq *rq;
>   
>   	sched_job = to_drm_sched_job(spsc_queue_peek(&entity->job_queue));
>   	if (!sched_job)
> @@ -510,23 +521,24 @@ struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity)
>   	 * Update the entity's location in the min heap according to
>   	 * the timestamp of the next job, if any.
>   	 */
> -	if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) {
> -		struct drm_sched_job *next;
> -		struct drm_sched_rq *rq;
> +	spin_lock(&entity->lock);
> +	rq = entity->rq;
> +	spin_lock(&rq->lock);
> +	next_job = to_drm_sched_job(spsc_queue_peek(&entity->job_queue));
> +	if (next_job) {
> +		ktime_t ts;
>   
> -		spin_lock(&entity->lock);
> -		rq = entity->rq;
> -		spin_lock(&rq->lock);
> -		next = to_drm_sched_job(spsc_queue_peek(&entity->job_queue));
> -		if (next) {
> -			drm_sched_rq_update_fifo_locked(entity, rq,
> -							next->submit_ts);
> -		} else {
> -			drm_sched_rq_remove_fifo_locked(entity, rq);
> -		}
> -		spin_unlock(&rq->lock);
> -		spin_unlock(&entity->lock);
> +		if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
> +			ts = next_job->submit_ts;
> +		else
> +			ts = drm_sched_rq_get_rr_deadline(rq);
> +
> +		drm_sched_rq_update_fifo_locked(entity, rq, ts);
> +	} else {
> +		drm_sched_rq_remove_fifo_locked(entity, rq);
>   	}
> +	spin_unlock(&rq->lock);
> +	spin_unlock(&entity->lock);
>   
>   	/* Jobs and entities might have different lifecycles. Since we're
>   	 * removing the job from the entities queue, set the jobs entity pointer
> @@ -624,9 +636,9 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job)
>   
>   		spin_lock(&rq->lock);
>   		drm_sched_rq_add_entity(rq, entity);
> -
> -		if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
> -			drm_sched_rq_update_fifo_locked(entity, rq, submit_ts);
> +		if (drm_sched_policy == DRM_SCHED_POLICY_RR)
> +			submit_ts = drm_sched_rq_get_rr_deadline(rq);
> +		drm_sched_rq_update_fifo_locked(entity, rq, submit_ts);
>   
>   		spin_unlock(&rq->lock);
>   		spin_unlock(&entity->lock);
> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
> index bcfc2abe349a..31cab7bb5428 100644
> --- a/drivers/gpu/drm/scheduler/sched_main.c
> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> @@ -189,7 +189,6 @@ static void drm_sched_rq_init(struct drm_gpu_scheduler *sched,
>   	spin_lock_init(&rq->lock);
>   	INIT_LIST_HEAD(&rq->entities);
>   	rq->rb_tree_root = RB_ROOT_CACHED;
> -	rq->current_entity = NULL;
>   	rq->sched = sched;
>   }
>   
> @@ -235,82 +234,13 @@ void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
>   	atomic_dec(rq->sched->score);
>   	list_del_init(&entity->list);
>   
> -	if (rq->current_entity == entity)
> -		rq->current_entity = NULL;
> -
> -	if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
> -		drm_sched_rq_remove_fifo_locked(entity, rq);
> +	drm_sched_rq_remove_fifo_locked(entity, rq);
>   
>   	spin_unlock(&rq->lock);
>   }
>   
>   /**
> - * drm_sched_rq_select_entity_rr - Select an entity which could provide a job to run
> - *
> - * @sched: the gpu scheduler
> - * @rq: scheduler run queue to check.
> - *
> - * Try to find the next ready entity.
> - *
> - * Return an entity if one is found; return an error-pointer (!NULL) if an
> - * entity was ready, but the scheduler had insufficient credits to accommodate
> - * its job; return NULL, if no ready entity was found.
> - */
> -static struct drm_sched_entity *
> -drm_sched_rq_select_entity_rr(struct drm_gpu_scheduler *sched,
> -			      struct drm_sched_rq *rq)
> -{
> -	struct drm_sched_entity *entity;
> -
> -	spin_lock(&rq->lock);
> -
> -	entity = rq->current_entity;
> -	if (entity) {
> -		list_for_each_entry_continue(entity, &rq->entities, list) {
> -			if (drm_sched_entity_is_ready(entity)) {
> -				/* If we can't queue yet, preserve the current
> -				 * entity in terms of fairness.
> -				 */
> -				if (!drm_sched_can_queue(sched, entity)) {
> -					spin_unlock(&rq->lock);
> -					return ERR_PTR(-ENOSPC);
> -				}
> -
> -				rq->current_entity = entity;
> -				reinit_completion(&entity->entity_idle);
> -				spin_unlock(&rq->lock);
> -				return entity;
> -			}
> -		}
> -	}
> -
> -	list_for_each_entry(entity, &rq->entities, list) {
> -		if (drm_sched_entity_is_ready(entity)) {
> -			/* If we can't queue yet, preserve the current entity in
> -			 * terms of fairness.
> -			 */
> -			if (!drm_sched_can_queue(sched, entity)) {
> -				spin_unlock(&rq->lock);
> -				return ERR_PTR(-ENOSPC);
> -			}
> -
> -			rq->current_entity = entity;
> -			reinit_completion(&entity->entity_idle);
> -			spin_unlock(&rq->lock);
> -			return entity;
> -		}
> -
> -		if (entity == rq->current_entity)
> -			break;
> -	}
> -
> -	spin_unlock(&rq->lock);
> -
> -	return NULL;
> -}
> -
> -/**
> - * drm_sched_rq_select_entity_fifo - Select an entity which provides a job to run
> + * drm_sched_rq_select_entity - Select an entity which provides a job to run
>    *
>    * @sched: the gpu scheduler
>    * @rq: scheduler run queue to check.
> @@ -322,8 +252,8 @@ drm_sched_rq_select_entity_rr(struct drm_gpu_scheduler *sched,
>    * its job; return NULL, if no ready entity was found.
>    */
>   static struct drm_sched_entity *
> -drm_sched_rq_select_entity_fifo(struct drm_gpu_scheduler *sched,
> -				struct drm_sched_rq *rq)
> +drm_sched_rq_select_entity(struct drm_gpu_scheduler *sched,
> +			   struct drm_sched_rq *rq)
>   {
>   	struct drm_sched_entity *entity = NULL;
>   	struct rb_node *rb;
> @@ -1055,15 +985,13 @@ void drm_sched_wakeup(struct drm_gpu_scheduler *sched)
>   static struct drm_sched_entity *
>   drm_sched_select_entity(struct drm_gpu_scheduler *sched)
>   {
> -	struct drm_sched_entity *entity;
> +	struct drm_sched_entity *entity = NULL;
>   	int i;
>   
>   	/* Start with the highest priority.
>   	 */
>   	for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) {
> -		entity = drm_sched_policy == DRM_SCHED_POLICY_FIFO ?
> -			drm_sched_rq_select_entity_fifo(sched, sched->sched_rq[i]) :
> -			drm_sched_rq_select_entity_rr(sched, sched->sched_rq[i]);
> +		entity = drm_sched_rq_select_entity(sched, sched->sched_rq[i]);
>   		if (entity)
>   			break;
>   	}
> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> index 005db1e35fad..a0164de08f5b 100644
> --- a/include/drm/gpu_scheduler.h
> +++ b/include/drm/gpu_scheduler.h
> @@ -245,8 +245,7 @@ struct drm_sched_entity {
>    * struct drm_sched_rq - queue of entities to be scheduled.
>    *
>    * @sched: the scheduler to which this rq belongs to.
> - * @lock: protects @entities, @rb_tree_root and @current_entity.
> - * @current_entity: the entity which is to be scheduled.
> + * @lock: protects @entities, @rb_tree_root and @rr_deadline.
>    * @entities: list of the entities to be scheduled.
>    * @rb_tree_root: root of time based priority queue of entities for FIFO scheduling
>    *
> @@ -259,7 +258,7 @@ struct drm_sched_rq {
>   
>   	spinlock_t			lock;
>   	/* Following members are protected by the @lock: */
> -	struct drm_sched_entity		*current_entity;
> +	ktime_t				rr_deadline;
>   	struct list_head		entities;

If I'm not completely mistaken you can now also nuke this entities list 
if you haven't already removed all users.

Regards,
Christian.

>   	struct rb_root_cached		rb_tree_root;
>   };



More information about the dri-devel mailing list