[PATCH v2] drm/scheduler: remove timeout work_struct from drm_sched_job

Wed Sep 26 15:55:18 UTC 2018

Hi Christian,

On Wed, Sep 26, 2018, 10:13 AM Christian König <
ckoenig.leichtzumerken at gmail.com> wrote:

> Am 26.09.2018 um 09:39 schrieb Lucas Stach:
> > Hi Nayan,
> >
> > Am Mittwoch, den 26.09.2018, 02:09 +0900 schrieb Nayan Deshmukh:
> >> having a delayed work item per job is redundant as we only need one
> >> per scheduler to track the time out the currently executing job.
> >>
> >> v2: the first element of the ring mirror list is the currently
> >> executing job so we don't need a additional variable for it
> >>
> >> Signed-off-by: Nayan Deshmukh <nayan26deshmukh at gmail.com>
> >> Suggested-by: Christian König <christian.koenig at amd.com>
> >> ---
> >>   drivers/gpu/drm/scheduler/sched_main.c | 31
> ++++++++++++++++---------------
> >>   include/drm/gpu_scheduler.h            |  6 +++---
> >>   2 files changed, 19 insertions(+), 18 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/scheduler/sched_main.c
> b/drivers/gpu/drm/scheduler/sched_main.c
> >> index 9ca741f3a0bc..4e8505d51795 100644
> >> --- a/drivers/gpu/drm/scheduler/sched_main.c
> >> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> >> @@ -197,19 +197,15 @@ static void drm_sched_job_finish(struct
> work_struct *work)
> >>       * manages to find this job as the next job in the list, the fence
> >>       * signaled check below will prevent the timeout to be restarted.
> >>       */
> >> -    cancel_delayed_work_sync(&s_job->work_tdr);
> >> +    cancel_delayed_work_sync(&sched->work_tdr);
> >>
> >>      spin_lock(&sched->job_list_lock);
> >> -    /* queue TDR for next job */
> >> -    if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
> >> -        !list_is_last(&s_job->node, &sched->ring_mirror_list)) {
> >> -            struct drm_sched_job *next = list_next_entry(s_job, node);
> >> -
> >> -            if (!dma_fence_is_signaled(&next->s_fence->finished))
> >> -                    schedule_delayed_work(&next->work_tdr,
> sched->timeout);
> >> -    }
> >>      /* remove job from ring_mirror_list */
> >>      list_del(&s_job->node);
> >> +    /* queue TDR for next job */
> >> +    if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
> >> +        !list_empty(&sched->ring_mirror_list))
> >> +            schedule_delayed_work(&sched->work_tdr, sched->timeout);
> >>      spin_unlock(&sched->job_list_lock);
> >>
> >>      dma_fence_put(&s_job->s_fence->finished);
> >> @@ -236,16 +232,21 @@ static void drm_sched_job_begin(struct
> drm_sched_job *s_job)
> >>      if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
> >>          list_first_entry_or_null(&sched->ring_mirror_list,
> >>                                   struct drm_sched_job, node) == s_job)
> >> -            schedule_delayed_work(&s_job->work_tdr, sched->timeout);
> >> +            schedule_delayed_work(&sched->work_tdr, sched->timeout);
> >>      spin_unlock(&sched->job_list_lock);
> >>   }
> >>
> >>   static void drm_sched_job_timedout(struct work_struct *work)
> >>   {
> >> -    struct drm_sched_job *job = container_of(work, struct
> drm_sched_job,
> >> -                                             work_tdr.work);
> >> +    struct drm_gpu_scheduler *sched;
> >> +    struct drm_sched_job *job;
> >> +
> >> +    sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work
> );
> >> +    job = list_first_entry_or_null(&sched->ring_mirror_list,
> >> +                                   struct drm_sched_job, node);
> >>
> >> -    job->sched->ops->timedout_job(job);
> >> +    if (job)
> >> +            job->sched->ops->timedout_job(job);
> > I don't think this is fully robust. Jobs are only removed from the
> > ring_mirror_list once the job_finish worker has run. If execution of
> > this worker is delayed for any reason (though it's really unlikely for
> > a delay as long as the job timeout to happen) you are blaming the wrong
> > job here.
> >
> > So I think what you need to to is find the first job in the ring mirror
> > list with an unsignaled finish fence to robustly find the stuck job.
>
> Yeah, that is a known problem I've pointed out as well.
>
> The issue is we have bug reports that this happened before the patch,
> but I'm not 100% sure how.
>
> My suggestion is to move a good part of the logic from
> drm_sched_hw_job_reset() and drm_sched_job_recovery() into
> drm_sched_job_timedout().
>
> E.g. we first call dma_fence_remove_callback() for each job and actually
> check the return value if the fence was already signaled.
>
> If we find a signaled fence we abort and add the callback back to the
> ones where we removed it.
>
> Nayan do you want to take care of this or should I take a look?
>
I can take care of it.

Regards,
Nayan

>
> Regards,
> Christian.
>
> >
> > Regards,
> > Lucas
> >
> >>   }
> >>
> >>   /**
> >> @@ -315,7 +316,7 @@ void drm_sched_job_recovery(struct
> drm_gpu_scheduler *sched)
> >>      s_job = list_first_entry_or_null(&sched->ring_mirror_list,
> >>                                       struct drm_sched_job, node);
> >>      if (s_job && sched->timeout != MAX_SCHEDULE_TIMEOUT)
> >> -            schedule_delayed_work(&s_job->work_tdr, sched->timeout);
> >> +            schedule_delayed_work(&sched->work_tdr, sched->timeout);
> >>
> >>      list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list,
> node) {
> >>              struct drm_sched_fence *s_fence = s_job->s_fence;
> >> @@ -384,7 +385,6 @@ int drm_sched_job_init(struct drm_sched_job *job,
> >>
> >>      INIT_WORK(&job->finish_work, drm_sched_job_finish);
> >>      INIT_LIST_HEAD(&job->node);
> >> -    INIT_DELAYED_WORK(&job->work_tdr, drm_sched_job_timedout);
> >>
> >>      return 0;
> >>   }
> >> @@ -575,6 +575,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
> >>      INIT_LIST_HEAD(&sched->ring_mirror_list);
> >>      spin_lock_init(&sched->job_list_lock);
> >>      atomic_set(&sched->hw_rq_count, 0);
> >> +    INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
> >>      atomic_set(&sched->num_jobs, 0);
> >>      atomic64_set(&sched->job_id_count, 0);
> >>
> >> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> >> index daec50f887b3..d87b268f1781 100644
> >> --- a/include/drm/gpu_scheduler.h
> >> +++ b/include/drm/gpu_scheduler.h
> >> @@ -175,8 +175,6 @@ struct drm_sched_fence *to_drm_sched_fence(struct
> dma_fence *f);
> >>    *               finished to remove the job from the
> >>    *               @drm_gpu_scheduler.ring_mirror_list.
> >>    * @node: used to append this struct to the
> @drm_gpu_scheduler.ring_mirror_list.
> >> - * @work_tdr: schedules a delayed call to @drm_sched_job_timedout
> after the timeout
> >> - *            interval is over.
> >>    * @id: a unique id assigned to each job scheduled on the scheduler.
> >>    * @karma: increment on every hang caused by this job. If this
> exceeds the hang
> >>    *         limit of the scheduler then the job is marked guilty and
> will not
> >> @@ -195,7 +193,6 @@ struct drm_sched_job {
> >>      struct dma_fence_cb             finish_cb;
> >>      struct work_struct              finish_work;
> >>      struct list_head                node;
> >> -    struct delayed_work             work_tdr;
> >>      uint64_t                        id;
> >>      atomic_t                        karma;
> >>      enum drm_sched_priority         s_priority;
> >> @@ -259,6 +256,8 @@ struct drm_sched_backend_ops {
> >>    *                 finished.
> >>    * @hw_rq_count: the number of jobs currently in the hardware queue.
> >>    * @job_id_count: used to assign unique id to the each job.
> >> + * @work_tdr: schedules a delayed call to @drm_sched_job_timedout
> after the
> >> + *            timeout interval is over.
> >>    * @thread: the kthread on which the scheduler which run.
> >>    * @ring_mirror_list: the list of jobs which are currently in the job
> queue.
> >>    * @job_list_lock: lock to protect the ring_mirror_list.
> >> @@ -278,6 +277,7 @@ struct drm_gpu_scheduler {
> >>      wait_queue_head_t               job_scheduled;
> >>      atomic_t                        hw_rq_count;
> >>      atomic64_t                      job_id_count;
> >> +    struct delayed_work             work_tdr;
> >>      struct task_struct              *thread;
> >>      struct list_head                ring_mirror_list;
> >>      spinlock_t                      job_list_lock;
> > _______________________________________________
> > dri-devel mailing list
> > dri-devel at lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/dri-devel
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/dri-devel/attachments/20180926/dad556b2/attachment.html>