[PATCH 6/6] drm/sched: Make use of a "done" thread
Christian König
christian.koenig at amd.com
Wed Nov 25 10:10:40 UTC 2020
Am 25.11.20 um 04:17 schrieb Luben Tuikov:
> Add a "done" list to which all completed jobs are added
> to be freed. The drm_sched_job_done() callback is the
> producer of jobs to this list.
>
> Add a "done" thread which consumes from the done list
> and frees up jobs. Now, the main scheduler thread only
> pushes jobs to the GPU and the "done" thread frees them
> up, on the way out of the GPU when they've completed
> execution.
Well there are quite a number of problems in this patch.
From the design I think we should be getting rid of the linked list and
not extend its use. And we also don't want to offload the freeing of
jobs into a different thread because that could potentially mean that
this is executed on a different CPU.
Then one obvious problem seems to be that you don't take into account
that we moved the job freeing into the scheduler thread to make sure
that this is suspended while the scheduler thread is stopped. This
behavior is now completely gone, e.g. the delete thread keeps running
while the scheduler thread is stopped.
A few more comments below.
> Make use of the status returned by the GPU driver
> timeout handler to decide whether to leave the job in
> the pending list, or to send it off to the done list.
> If a job is done, it is added to the done list and the
> done thread woken up. If a job needs more time, it is
> left on the pending list and the timeout timer
> restarted.
>
> Eliminate the polling mechanism of picking out done
> jobs from the pending list, i.e. eliminate
> drm_sched_get_cleanup_job(). Now the main scheduler
> thread only pushes jobs down to the GPU.
>
> Various other optimizations to the GPU scheduler
> and job recovery are possible with this format.
>
> Signed-off-by: Luben Tuikov <luben.tuikov at amd.com>
> ---
> drivers/gpu/drm/scheduler/sched_main.c | 173 +++++++++++++------------
> include/drm/gpu_scheduler.h | 14 ++
> 2 files changed, 101 insertions(+), 86 deletions(-)
>
> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
> index 3eb7618a627d..289ae68cd97f 100644
> --- a/drivers/gpu/drm/scheduler/sched_main.c
> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> @@ -164,7 +164,8 @@ drm_sched_rq_select_entity(struct drm_sched_rq *rq)
> * drm_sched_job_done - complete a job
> * @s_job: pointer to the job which is done
> *
> - * Finish the job's fence and wake up the worker thread.
> + * Finish the job's fence, move it to the done list,
> + * and wake up the done thread.
> */
> static void drm_sched_job_done(struct drm_sched_job *s_job)
> {
> @@ -179,7 +180,12 @@ static void drm_sched_job_done(struct drm_sched_job *s_job)
> dma_fence_get(&s_fence->finished);
> drm_sched_fence_finished(s_fence);
> dma_fence_put(&s_fence->finished);
> - wake_up_interruptible(&sched->wake_up_worker);
> +
> + spin_lock(&sched->job_list_lock);
> + list_move(&s_job->list, &sched->done_list);
> + spin_unlock(&sched->job_list_lock);
> +
> + wake_up_interruptible(&sched->done_wait_q);
How is the worker thread then woken up to push new jobs to the hardware?
> }
>
> /**
> @@ -221,11 +227,10 @@ bool drm_sched_dependency_optimized(struct dma_fence* fence,
> EXPORT_SYMBOL(drm_sched_dependency_optimized);
>
> /**
> - * drm_sched_start_timeout - start timeout for reset worker
> - *
> - * @sched: scheduler instance to start the worker for
> + * drm_sched_start_timeout - start a timeout timer
> + * @sched: scheduler instance whose job we're timing
> *
> - * Start the timeout for the given scheduler.
> + * Start a timeout timer for the given scheduler.
> */
> static void drm_sched_start_timeout(struct drm_gpu_scheduler *sched)
> {
> @@ -305,8 +310,8 @@ static void drm_sched_job_begin(struct drm_sched_job *s_job)
>
> spin_lock(&sched->job_list_lock);
> list_add_tail(&s_job->list, &sched->pending_list);
> - drm_sched_start_timeout(sched);
> spin_unlock(&sched->job_list_lock);
> + drm_sched_start_timeout(sched);
This looks wrong, the drm_sched_start_timeout() function used to need
the lock. Why should that have changed?
> }
>
> static void drm_sched_job_timedout(struct work_struct *work)
> @@ -316,37 +321,30 @@ static void drm_sched_job_timedout(struct work_struct *work)
>
> sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
>
> - /* Protects against concurrent deletion in drm_sched_get_cleanup_job */
> spin_lock(&sched->job_list_lock);
> job = list_first_entry_or_null(&sched->pending_list,
> struct drm_sched_job, list);
> + spin_unlock(&sched->job_list_lock);
>
> if (job) {
> - /*
> - * Remove the bad job so it cannot be freed by concurrent
> - * drm_sched_cleanup_jobs. It will be reinserted back after sched->thread
> - * is parked at which point it's safe.
> - */
> - list_del_init(&job->list);
> - spin_unlock(&sched->job_list_lock);
> + int res;
>
> - job->sched->ops->timedout_job(job);
> + job->job_status |= DRM_JOB_STATUS_TIMEOUT;
> + res = job->sched->ops->timedout_job(job);
> + if (res == 0) {
> + /* The job is out of the device.
> + */
> + spin_lock(&sched->job_list_lock);
> + list_move(&job->list, &sched->done_list);
> + spin_unlock(&sched->job_list_lock);
>
> - /*
> - * Guilty job did complete and hence needs to be manually removed
> - * See drm_sched_stop doc.
> - */
> - if (sched->free_guilty) {
> - job->sched->ops->free_job(job);
> - sched->free_guilty = false;
> + wake_up_interruptible(&sched->done_wait_q);
> + } else {
> + /* The job needs more time.
> + */
> + drm_sched_start_timeout(sched);
> }
> - } else {
> - spin_unlock(&sched->job_list_lock);
> }
> -
> - spin_lock(&sched->job_list_lock);
> - drm_sched_start_timeout(sched);
> - spin_unlock(&sched->job_list_lock);
> }
>
> /**
> @@ -511,15 +509,13 @@ void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery)
> else if (r)
> DRM_ERROR("fence add callback failed (%d)\n",
> r);
> - } else
> + } else {
> drm_sched_job_done(s_job);
> + }
> }
>
> - if (full_recovery) {
> - spin_lock(&sched->job_list_lock);
> + if (full_recovery)
> drm_sched_start_timeout(sched);
> - spin_unlock(&sched->job_list_lock);
Same here.
Regards,
Christian.
> - }
>
> kthread_unpark(sched->thread);
> }
> @@ -667,47 +663,6 @@ drm_sched_select_entity(struct drm_gpu_scheduler *sched)
> return entity;
> }
>
> -/**
> - * drm_sched_get_cleanup_job - fetch the next finished job to be destroyed
> - *
> - * @sched: scheduler instance
> - *
> - * Returns the next finished job from the pending list (if there is one)
> - * ready for it to be destroyed.
> - */
> -static struct drm_sched_job *
> -drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
> -{
> - struct drm_sched_job *job;
> -
> - /*
> - * Don't destroy jobs while the timeout worker is running OR thread
> - * is being parked and hence assumed to not touch pending_list
> - */
> - if ((sched->timeout != MAX_SCHEDULE_TIMEOUT &&
> - !cancel_delayed_work(&sched->work_tdr)) ||
> - kthread_should_park())
> - return NULL;
> -
> - spin_lock(&sched->job_list_lock);
> -
> - job = list_first_entry_or_null(&sched->pending_list,
> - struct drm_sched_job, list);
> -
> - if (job && dma_fence_is_signaled(&job->s_fence->finished)) {
> - /* remove job from pending_list */
> - list_del_init(&job->list);
> - } else {
> - job = NULL;
> - /* queue timeout for next job */
> - drm_sched_start_timeout(sched);
> - }
> -
> - spin_unlock(&sched->job_list_lock);
> -
> - return job;
> -}
> -
> /**
> * drm_sched_pick_best - Get a drm sched from a sched_list with the least load
> * @sched_list: list of drm_gpu_schedulers
> @@ -761,6 +716,44 @@ static bool drm_sched_blocked(struct drm_gpu_scheduler *sched)
> return false;
> }
>
> +/**
> + * drm_sched_done - free done tasks
> + * @param: pointer to a scheduler instance
> + *
> + * Returns 0.
> + */
> +static int drm_sched_done(void *param)
> +{
> + struct drm_gpu_scheduler *sched = param;
> +
> + do {
> + LIST_HEAD(done_q);
> +
> + wait_event_interruptible(sched->done_wait_q,
> + kthread_should_stop() ||
> + !list_empty(&sched->done_list));
> +
> + spin_lock(&sched->job_list_lock);
> + list_splice_init(&sched->done_list, &done_q);
> + spin_unlock(&sched->job_list_lock);
> +
> + if (list_empty(&done_q))
> + continue;
> +
> + while (!list_empty(&done_q)) {
> + struct drm_sched_job *job;
> +
> + job = list_first_entry(&done_q,
> + struct drm_sched_job,
> + list);
> + list_del_init(&job->list);
> + sched->ops->free_job(job);
> + }
> + } while (!kthread_should_stop());
> +
> + return 0;
> +}
> +
> /**
> * drm_sched_main - main scheduler thread
> *
> @@ -770,7 +763,7 @@ static bool drm_sched_blocked(struct drm_gpu_scheduler *sched)
> */
> static int drm_sched_main(void *param)
> {
> - struct drm_gpu_scheduler *sched = (struct drm_gpu_scheduler *)param;
> + struct drm_gpu_scheduler *sched = param;
> int r;
>
> sched_set_fifo_low(current);
> @@ -780,20 +773,12 @@ static int drm_sched_main(void *param)
> struct drm_sched_fence *s_fence;
> struct drm_sched_job *sched_job;
> struct dma_fence *fence;
> - struct drm_sched_job *cleanup_job = NULL;
>
> wait_event_interruptible(sched->wake_up_worker,
> - (cleanup_job = drm_sched_get_cleanup_job(sched)) ||
> (!drm_sched_blocked(sched) &&
> (entity = drm_sched_select_entity(sched))) ||
> kthread_should_stop());
>
> - if (cleanup_job) {
> - sched->ops->free_job(cleanup_job);
> - /* queue timeout for next job */
> - drm_sched_start_timeout(sched);
> - }
> -
> if (!entity)
> continue;
>
> @@ -820,8 +805,7 @@ static int drm_sched_main(void *param)
> if (r == -ENOENT)
> drm_sched_job_done(sched_job);
> else if (r)
> - DRM_ERROR("fence add callback failed (%d)\n",
> - r);
> + DRM_ERROR("fence add callback failed (%d)\n", r);
> dma_fence_put(fence);
> } else {
> if (IS_ERR(fence))
> @@ -865,7 +849,9 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
>
> init_waitqueue_head(&sched->wake_up_worker);
> init_waitqueue_head(&sched->job_scheduled);
> + init_waitqueue_head(&sched->done_wait_q);
> INIT_LIST_HEAD(&sched->pending_list);
> + INIT_LIST_HEAD(&sched->done_list);
> spin_lock_init(&sched->job_list_lock);
> atomic_set(&sched->hw_rq_count, 0);
> INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
> @@ -881,6 +867,21 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
> return ret;
> }
>
> + snprintf(sched->thread_done_name, DRM_THREAD_NAME_LEN, "%s%s",
> + sched->name, "-done");
> + sched->thread_done_name[DRM_THREAD_NAME_LEN - 1] = '\0';
> + sched->thread_done = kthread_run(drm_sched_done, sched,
> + sched->thread_done_name);
> + if (IS_ERR(sched->thread_done)) {
> + ret = kthread_stop(sched->thread);
> + if (!ret) {
> + /* free_kthread_struct(sched->thread); */
> + sched->thread = NULL;
> + }
> + DRM_ERROR("Failed to start thread %s", sched->thread_done_name);
> + return ret;
> + }
> +
> sched->ready = true;
> return 0;
> }
> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> index 3a5686c3b5e9..b282d6158b50 100644
> --- a/include/drm/gpu_scheduler.h
> +++ b/include/drm/gpu_scheduler.h
> @@ -169,6 +169,12 @@ struct drm_sched_fence {
>
> struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f);
>
> +enum drm_job_status {
> + DRM_JOB_STATUS_NONE = 0 << 0,
> + DRM_JOB_STATUS_DONE = 1 << 0,
> + DRM_JOB_STATUS_TIMEOUT = 1 << 1,
> +};
> +
> /**
> * struct drm_sched_job - A job to be run by an entity.
> *
> @@ -198,6 +204,7 @@ struct drm_sched_job {
> uint64_t id;
> atomic_t karma;
> enum drm_sched_priority s_priority;
> + enum drm_job_status job_status;
> struct drm_sched_entity *entity;
> struct dma_fence_cb cb;
> };
> @@ -284,15 +291,22 @@ struct drm_gpu_scheduler {
> uint32_t hw_submission_limit;
> long timeout;
> const char *name;
> + char thread_done_name[DRM_THREAD_NAME_LEN];
> +
> struct drm_sched_rq sched_rq[DRM_SCHED_PRIORITY_COUNT];
> wait_queue_head_t wake_up_worker;
> wait_queue_head_t job_scheduled;
> + wait_queue_head_t done_wait_q;
> atomic_t hw_rq_count;
> atomic64_t job_id_count;
> struct delayed_work work_tdr;
> struct task_struct *thread;
> + struct task_struct *thread_done;
> +
> struct list_head pending_list;
> + struct list_head done_list;
> spinlock_t job_list_lock;
> +
> int hang_limit;
> atomic_t score;
> bool ready;
More information about the amd-gfx
mailing list