[RFC PATCH 04/10] drm/sched: Add generic scheduler message interface
Matthew Brost
matthew.brost at intel.com
Mon Jul 31 02:42:16 UTC 2023
On Thu, May 04, 2023 at 01:28:52AM -0400, Luben Tuikov wrote:
> On 2023-04-03 20:22, Matthew Brost wrote:
> > Add generic schedule message interface which sends messages to backend
> > from the drm_gpu_scheduler main submission thread. The idea is some of
> > these messages modify some state in drm_sched_entity which is also
> > modified during submission. By scheduling these messages and submission
> > in the same thread their is not race changing states in
> > drm_sched_entity.
>
> "... there is no race when changing ..." or better yet,
> "... we eliminate races due to drm_sched_entity state changes."
>
> >
> > This interface will be used in XE, new Intel GPU driver, to cleanup,
>
> "Xe"?
>
Will fix both.
Matt
> Regards,
> Luben
>
> > suspend, resume, and change scheduling properties of a drm_sched_entity.
> >
> > The interface is designed to be generic and extendable with only the
> > backend understanding the messages.
> >
> > Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> > ---
> > drivers/gpu/drm/scheduler/sched_main.c | 58 +++++++++++++++++++++++++-
> > include/drm/gpu_scheduler.h | 29 ++++++++++++-
> > 2 files changed, 84 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
> > index 2795021efe7b..9dc3378e9c5e 100644
> > --- a/drivers/gpu/drm/scheduler/sched_main.c
> > +++ b/drivers/gpu/drm/scheduler/sched_main.c
> > @@ -1055,6 +1055,54 @@ drm_sched_pick_best(struct drm_gpu_scheduler **sched_list,
> > }
> > EXPORT_SYMBOL(drm_sched_pick_best);
> >
> > +/**
> > + * drm_sched_add_msg - add scheduler message
> > + *
> > + * @sched: scheduler instance
> > + * @msg: message to be added
> > + *
> > + * Can and will pass an jobs waiting on dependencies or in a runnable queue.
> > + * Messages processing will stop if schedule run wq is stopped and resume when
> > + * run wq is started.
> > + */
> > +void drm_sched_add_msg(struct drm_gpu_scheduler *sched,
> > + struct drm_sched_msg *msg)
> > +{
> > + spin_lock(&sched->job_list_lock);
> > + list_add_tail(&msg->link, &sched->msgs);
> > + spin_unlock(&sched->job_list_lock);
> > +
> > + /*
> > + * Same as above in drm_sched_run_wq_queue, try to kick worker if
> > + * paused, harmless if this races
> > + */
> > + if (!sched->pause_run_wq)
> > + queue_work(sched->run_wq, &sched->work_run);
> > +}
> > +EXPORT_SYMBOL(drm_sched_add_msg);
> > +
> > +/**
> > + * drm_sched_get_msg - get scheduler message
> > + *
> > + * @sched: scheduler instance
> > + *
> > + * Returns NULL or message
> > + */
> > +static struct drm_sched_msg *
> > +drm_sched_get_msg(struct drm_gpu_scheduler *sched)
> > +{
> > + struct drm_sched_msg *msg;
> > +
> > + spin_lock(&sched->job_list_lock);
> > + msg = list_first_entry_or_null(&sched->msgs,
> > + struct drm_sched_msg, link);
> > + if (msg)
> > + list_del(&msg->link);
> > + spin_unlock(&sched->job_list_lock);
> > +
> > + return msg;
> > +}
> > +
> > /**
> > * drm_sched_main - main scheduler thread
> > *
> > @@ -1068,6 +1116,7 @@ static void drm_sched_main(struct work_struct *w)
> >
> > while (!READ_ONCE(sched->pause_run_wq)) {
> > struct drm_sched_entity *entity;
> > + struct drm_sched_msg *msg;
> > struct drm_sched_fence *s_fence;
> > struct drm_sched_job *sched_job;
> > struct dma_fence *fence;
> > @@ -1075,12 +1124,16 @@ static void drm_sched_main(struct work_struct *w)
> >
> > cleanup_job = drm_sched_get_cleanup_job(sched);
> > entity = drm_sched_select_entity(sched);
> > + msg = drm_sched_get_msg(sched);
> >
> > if (cleanup_job)
> > sched->ops->free_job(cleanup_job);
> >
> > + if (msg)
> > + sched->ops->process_msg(msg);
> > +
> > if (!entity) {
> > - if (!cleanup_job)
> > + if (!cleanup_job && !msg)
> > break;
> > continue;
> > }
> > @@ -1089,7 +1142,7 @@ static void drm_sched_main(struct work_struct *w)
> >
> > if (!sched_job) {
> > complete_all(&entity->entity_idle);
> > - if (!cleanup_job)
> > + if (!cleanup_job && !msg)
> > break;
> > continue;
> > }
> > @@ -1181,6 +1234,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
> >
> > init_waitqueue_head(&sched->job_scheduled);
> > INIT_LIST_HEAD(&sched->pending_list);
> > + INIT_LIST_HEAD(&sched->msgs);
> > spin_lock_init(&sched->job_list_lock);
> > atomic_set(&sched->hw_rq_count, 0);
> > INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
> > diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> > index 3e421f5a710c..18172ae63ab7 100644
> > --- a/include/drm/gpu_scheduler.h
> > +++ b/include/drm/gpu_scheduler.h
> > @@ -398,6 +398,23 @@ enum drm_gpu_sched_stat {
> > DRM_GPU_SCHED_STAT_ENODEV,
> > };
> >
> > +/**
> > + * struct drm_sched_msg - an in-band (relative to GPU scheduler run queue)
> > + * message
> > + *
> > + * Generic enough for backend defined messages, backend can expand if needed.
> > + */
> > +struct drm_sched_msg {
> > + /** @link: list link into the gpu scheduler list of messages */
> > + struct list_head link;
> > + /**
> > + * @private_data: opaque pointer to message private data (backend defined)
> > + */
> > + void *private_data;
> > + /** @opcode: opcode of message (backend defined) */
> > + unsigned int opcode;
> > +};
> > +
> > /**
> > * struct drm_sched_backend_ops - Define the backend operations
> > * called by the scheduler
> > @@ -475,6 +492,12 @@ struct drm_sched_backend_ops {
> > * and it's time to clean it up.
> > */
> > void (*free_job)(struct drm_sched_job *sched_job);
> > +
> > + /**
> > + * @process_msg: Process a message. Allowed to block, it is this
> > + * function's responsibility to free message if dynamically allocated.
> > + */
> > + void (*process_msg)(struct drm_sched_msg *msg);
> > };
> >
> > /**
> > @@ -486,6 +509,7 @@ struct drm_sched_backend_ops {
> > * @timeout: the time after which a job is removed from the scheduler.
> > * @name: name of the ring for which this scheduler is being used.
> > * @sched_rq: priority wise array of run queues.
> > + * @msgs: list of messages to be processed in @work_run
> > * @job_scheduled: once @drm_sched_entity_do_release is called the scheduler
> > * waits on this wait queue until all the scheduled jobs are
> > * finished.
> > @@ -493,7 +517,7 @@ struct drm_sched_backend_ops {
> > * @job_id_count: used to assign unique id to the each job.
> > * @run_wq: workqueue used to queue @work_run
> > * @timeout_wq: workqueue used to queue @work_tdr
> > - * @work_run: schedules jobs and cleans up entities
> > + * @work_run: schedules jobs, cleans up jobs, and processes messages
> > * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the
> > * timeout interval is over.
> > * @pending_list: the list of jobs which are currently in the job queue.
> > @@ -517,6 +541,7 @@ struct drm_gpu_scheduler {
> > long timeout;
> > const char *name;
> > struct drm_sched_rq sched_rq[DRM_SCHED_PRIORITY_COUNT];
> > + struct list_head msgs;
> > wait_queue_head_t job_scheduled;
> > atomic_t hw_rq_count;
> > atomic64_t job_id_count;
> > @@ -570,6 +595,8 @@ void drm_sched_entity_modify_sched(struct drm_sched_entity *entity,
> >
> > void drm_sched_job_cleanup(struct drm_sched_job *job);
> > void drm_sched_wakeup(struct drm_gpu_scheduler *sched);
> > +void drm_sched_add_msg(struct drm_gpu_scheduler *sched,
> > + struct drm_sched_msg *msg);
> > void drm_sched_run_wq_stop(struct drm_gpu_scheduler *sched);
> > void drm_sched_run_wq_start(struct drm_gpu_scheduler *sched);
> > void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad);
>
More information about the dri-devel
mailing list