[PATCH 4/8] drm/sched: Add generic scheduler message interface

Tue Aug 8 14:14:55 UTC 2023

Am 08.08.23 um 16:06 schrieb Matthew Brost:
> [SNIP]
>> Basically workqueues are the in kernel infrastructure for exactly that use
>> case and we are trying to re-create that here and that is usually a rather
>> bad idea.
>>
> Ok let me play around with what this would look like in Xe, what you are
> suggesting would be ordered-wq per scheduler, work item for run job,
> work item for clean up job, and work item for a message. That might
> work I suppose? Only issue I see is scaling as this exposes an
> ordered-wq creation directly to an IOCTL. No idea if that is actually a
> concern though.

That's a very good question I can't answer of hand either.

But from the history of work queues I know that they were invented to 
reduce the overhead/costs of having many kernel threads.

So my educated guess is that you probably won't find anything better at 
the moment. If work queues then indeed don't match this use case then we 
need to figure out how to improve them or find a different solution.

Christian.

>
> Matt
>
>> Regards,
>> Christian.
>>
>>> Matt
>>>
>>>> Or what am I missing?
>>>>
>>>>> Regards,
>>>>> Christian.
>>>>>
>>>>>> Worst case I think this isn't a dead-end and can be refactored to
>>>>>> internally use the workqueue services, with the new functions here
>>>>>> just being dumb wrappers until everyone is converted over. So it
>>>>>> doesn't look like an expensive mistake, if it turns out to be a
>>>>>> mistake.
>>>>>> -Daniel
>>>>>>
>>>>>>
>>>>>>> Regards,
>>>>>>> Christian.
>>>>>>>
>>>>>>>> Signed-off-by: Matthew Brost <matthew.brost at intel.com>
>>>>>>>> ---
>>>>>>>>      drivers/gpu/drm/scheduler/sched_main.c | 52 +++++++++++++++++++++++++-
>>>>>>>>      include/drm/gpu_scheduler.h            | 29 +++++++++++++-
>>>>>>>>      2 files changed, 78 insertions(+), 3 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
>>>>>>>> index 2597fb298733..84821a124ca2 100644
>>>>>>>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>>>>>>>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>>>>>>>> @@ -1049,6 +1049,49 @@ drm_sched_pick_best(struct drm_gpu_scheduler **sched_list,
>>>>>>>>      }
>>>>>>>>      EXPORT_SYMBOL(drm_sched_pick_best);
>>>>>>>>
>>>>>>>> +/**
>>>>>>>> + * drm_sched_add_msg - add scheduler message
>>>>>>>> + *
>>>>>>>> + * @sched: scheduler instance
>>>>>>>> + * @msg: message to be added
>>>>>>>> + *
>>>>>>>> + * Can and will pass an jobs waiting on dependencies or in a runnable queue.
>>>>>>>> + * Messages processing will stop if schedule run wq is stopped and resume when
>>>>>>>> + * run wq is started.
>>>>>>>> + */
>>>>>>>> +void drm_sched_add_msg(struct drm_gpu_scheduler *sched,
>>>>>>>> +                    struct drm_sched_msg *msg)
>>>>>>>> +{
>>>>>>>> +     spin_lock(&sched->job_list_lock);
>>>>>>>> +     list_add_tail(&msg->link, &sched->msgs);
>>>>>>>> +     spin_unlock(&sched->job_list_lock);
>>>>>>>> +
>>>>>>>> +     drm_sched_run_wq_queue(sched);
>>>>>>>> +}
>>>>>>>> +EXPORT_SYMBOL(drm_sched_add_msg);
>>>>>>>> +
>>>>>>>> +/**
>>>>>>>> + * drm_sched_get_msg - get scheduler message
>>>>>>>> + *
>>>>>>>> + * @sched: scheduler instance
>>>>>>>> + *
>>>>>>>> + * Returns NULL or message
>>>>>>>> + */
>>>>>>>> +static struct drm_sched_msg *
>>>>>>>> +drm_sched_get_msg(struct drm_gpu_scheduler *sched)
>>>>>>>> +{
>>>>>>>> +     struct drm_sched_msg *msg;
>>>>>>>> +
>>>>>>>> +     spin_lock(&sched->job_list_lock);
>>>>>>>> +     msg = list_first_entry_or_null(&sched->msgs,
>>>>>>>> +                                    struct drm_sched_msg, link);
>>>>>>>> +     if (msg)
>>>>>>>> +             list_del(&msg->link);
>>>>>>>> +     spin_unlock(&sched->job_list_lock);
>>>>>>>> +
>>>>>>>> +     return msg;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>>      /**
>>>>>>>>       * drm_sched_main - main scheduler thread
>>>>>>>>       *
>>>>>>>> @@ -1060,6 +1103,7 @@ static void drm_sched_main(struct work_struct *w)
>>>>>>>>                  container_of(w, struct drm_gpu_scheduler, work_run);
>>>>>>>>          struct drm_sched_entity *entity;
>>>>>>>>          struct drm_sched_job *cleanup_job;
>>>>>>>> +     struct drm_sched_msg *msg;
>>>>>>>>          int r;
>>>>>>>>
>>>>>>>>          if (READ_ONCE(sched->pause_run_wq))
>>>>>>>> @@ -1067,12 +1111,15 @@ static void drm_sched_main(struct work_struct *w)
>>>>>>>>
>>>>>>>>          cleanup_job = drm_sched_get_cleanup_job(sched);
>>>>>>>>          entity = drm_sched_select_entity(sched);
>>>>>>>> +     msg = drm_sched_get_msg(sched);
>>>>>>>>
>>>>>>>> -     if (!entity && !cleanup_job)
>>>>>>>> +     if (!entity && !cleanup_job && !msg)
>>>>>>>>                  return; /* No more work */
>>>>>>>>
>>>>>>>>          if (cleanup_job)
>>>>>>>>                  sched->ops->free_job(cleanup_job);
>>>>>>>> +     if (msg)
>>>>>>>> +             sched->ops->process_msg(msg);
>>>>>>>>
>>>>>>>>          if (entity) {
>>>>>>>>                  struct dma_fence *fence;
>>>>>>>> @@ -1082,7 +1129,7 @@ static void drm_sched_main(struct work_struct *w)
>>>>>>>>                  sched_job = drm_sched_entity_pop_job(entity);
>>>>>>>>                  if (!sched_job) {
>>>>>>>>                          complete_all(&entity->entity_idle);
>>>>>>>> -                     if (!cleanup_job)
>>>>>>>> +                     if (!cleanup_job && !msg)
>>>>>>>>                                  return; /* No more work */
>>>>>>>>                          goto again;
>>>>>>>>                  }
>>>>>>>> @@ -1177,6 +1224,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
>>>>>>>>
>>>>>>>>          init_waitqueue_head(&sched->job_scheduled);
>>>>>>>>          INIT_LIST_HEAD(&sched->pending_list);
>>>>>>>> +     INIT_LIST_HEAD(&sched->msgs);
>>>>>>>>          spin_lock_init(&sched->job_list_lock);
>>>>>>>>          atomic_set(&sched->hw_rq_count, 0);
>>>>>>>>          INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
>>>>>>>> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
>>>>>>>> index df1993dd44ae..267bd060d178 100644
>>>>>>>> --- a/include/drm/gpu_scheduler.h
>>>>>>>> +++ b/include/drm/gpu_scheduler.h
>>>>>>>> @@ -394,6 +394,23 @@ enum drm_gpu_sched_stat {
>>>>>>>>          DRM_GPU_SCHED_STAT_ENODEV,
>>>>>>>>      };
>>>>>>>>
>>>>>>>> +/**
>>>>>>>> + * struct drm_sched_msg - an in-band (relative to GPU scheduler run queue)
>>>>>>>> + * message
>>>>>>>> + *
>>>>>>>> + * Generic enough for backend defined messages, backend can expand if needed.
>>>>>>>> + */
>>>>>>>> +struct drm_sched_msg {
>>>>>>>> +     /** @link: list link into the gpu scheduler list of messages */
>>>>>>>> +     struct list_head                link;
>>>>>>>> +     /**
>>>>>>>> +      * @private_data: opaque pointer to message private data (backend defined)
>>>>>>>> +      */
>>>>>>>> +     void                            *private_data;
>>>>>>>> +     /** @opcode: opcode of message (backend defined) */
>>>>>>>> +     unsigned int                    opcode;
>>>>>>>> +};
>>>>>>>> +
>>>>>>>>      /**
>>>>>>>>       * struct drm_sched_backend_ops - Define the backend operations
>>>>>>>>       *  called by the scheduler
>>>>>>>> @@ -471,6 +488,12 @@ struct drm_sched_backend_ops {
>>>>>>>>               * and it's time to clean it up.
>>>>>>>>           */
>>>>>>>>          void (*free_job)(struct drm_sched_job *sched_job);
>>>>>>>> +
>>>>>>>> +     /**
>>>>>>>> +      * @process_msg: Process a message. Allowed to block, it is this
>>>>>>>> +      * function's responsibility to free message if dynamically allocated.
>>>>>>>> +      */
>>>>>>>> +     void (*process_msg)(struct drm_sched_msg *msg);
>>>>>>>>      };
>>>>>>>>
>>>>>>>>      /**
>>>>>>>> @@ -482,6 +505,7 @@ struct drm_sched_backend_ops {
>>>>>>>>       * @timeout: the time after which a job is removed from the scheduler.
>>>>>>>>       * @name: name of the ring for which this scheduler is being used.
>>>>>>>>       * @sched_rq: priority wise array of run queues.
>>>>>>>> + * @msgs: list of messages to be processed in @work_run
>>>>>>>>       * @job_scheduled: once @drm_sched_entity_do_release is called the scheduler
>>>>>>>>       *                 waits on this wait queue until all the scheduled jobs are
>>>>>>>>       *                 finished.
>>>>>>>> @@ -489,7 +513,7 @@ struct drm_sched_backend_ops {
>>>>>>>>       * @job_id_count: used to assign unique id to the each job.
>>>>>>>>       * @run_wq: workqueue used to queue @work_run
>>>>>>>>       * @timeout_wq: workqueue used to queue @work_tdr
>>>>>>>> - * @work_run: schedules jobs and cleans up entities
>>>>>>>> + * @work_run: schedules jobs, cleans up jobs, and processes messages
>>>>>>>>       * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the
>>>>>>>>       *            timeout interval is over.
>>>>>>>>       * @pending_list: the list of jobs which are currently in the job queue.
>>>>>>>> @@ -513,6 +537,7 @@ struct drm_gpu_scheduler {
>>>>>>>>          long                            timeout;
>>>>>>>>          const char                      *name;
>>>>>>>>          struct drm_sched_rq             sched_rq[DRM_SCHED_PRIORITY_COUNT];
>>>>>>>> +     struct list_head                msgs;
>>>>>>>>          wait_queue_head_t               job_scheduled;
>>>>>>>>          atomic_t                        hw_rq_count;
>>>>>>>>          atomic64_t                      job_id_count;
>>>>>>>> @@ -566,6 +591,8 @@ void drm_sched_entity_modify_sched(struct drm_sched_entity *entity,
>>>>>>>>
>>>>>>>>      void drm_sched_job_cleanup(struct drm_sched_job *job);
>>>>>>>>      void drm_sched_wakeup(struct drm_gpu_scheduler *sched);
>>>>>>>> +void drm_sched_add_msg(struct drm_gpu_scheduler *sched,
>>>>>>>> +                    struct drm_sched_msg *msg);
>>>>>>>>      void drm_sched_run_wq_stop(struct drm_gpu_scheduler *sched);
>>>>>>>>      void drm_sched_run_wq_start(struct drm_gpu_scheduler *sched);
>>>>>>>>      void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad);
>>>> -- 
>>>> Daniel Vetter
>>>> Software Engineer, Intel Corporation
>>>> http://blog.ffwll.ch