[PATCH 1/8] drm/scheduler: properly forward fence errors

Christian König ckoenig.leichtzumerken at gmail.com
Mon Apr 24 10:06:58 UTC 2023


I've pushed the scheduler patch to drm-misc-next and the whole set to 
amd-staging-drm-next.

Christian.

Am 21.04.23 um 15:40 schrieb Deucher, Alexander:
>
> [AMD Official Use Only - General]
>
>
> Sure.  We can pull it into amd-staging-drm-next as well if we need it 
> for any customers in the short term.
>
> Alex
> ------------------------------------------------------------------------
> *From:* Christian König <ckoenig.leichtzumerken at gmail.com>
> *Sent:* Friday, April 21, 2023 9:27 AM
> *To:* amd-gfx at lists.freedesktop.org <amd-gfx at lists.freedesktop.org>; 
> Deucher, Alexander <Alexander.Deucher at amd.com>
> *Cc:* Tuikov, Luben <Luben.Tuikov at amd.com>
> *Subject:* Re: [PATCH 1/8] drm/scheduler: properly forward fence errors
> Alex can I merge that through drm-misc-next or do we really need
> amd-staging-drm-next?
>
> Christian.
>
> Am 21.04.23 um 07:22 schrieb Luben Tuikov:
> > Hi Christian,
> >
> > Thanks for working on this.
> >
> > Series is,
> > Reviewed-by: Luben Tuikov <luben.tuikov at amd.com>
> >
> > Regards,
> > Luben
> >
> > On 2023-04-20 07:57, Christian König wrote:
> >> When a hw fence is signaled with an error properly forward that to the
> >> finished fence.
> >>
> >> Signed-off-by: Christian König <christian.koenig at amd.com>
> >> ---
> >>   drivers/gpu/drm/scheduler/sched_entity.c |  4 +---
> >>   drivers/gpu/drm/scheduler/sched_fence.c  |  4 +++-
> >>   drivers/gpu/drm/scheduler/sched_main.c   | 18 ++++++++----------
> >>   include/drm/gpu_scheduler.h              |  2 +-
> >>   4 files changed, 13 insertions(+), 15 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/scheduler/sched_entity.c 
> b/drivers/gpu/drm/scheduler/sched_entity.c
> >> index 15d04a0ec623..eaf71fe15ed3 100644
> >> --- a/drivers/gpu/drm/scheduler/sched_entity.c
> >> +++ b/drivers/gpu/drm/scheduler/sched_entity.c
> >> @@ -144,7 +144,7 @@ static void 
> drm_sched_entity_kill_jobs_work(struct work_struct *wrk)
> >>   {
> >>       struct drm_sched_job *job = container_of(wrk, typeof(*job), 
> work);
> >>
> >> -    drm_sched_fence_finished(job->s_fence);
> >> +    drm_sched_fence_finished(job->s_fence, -ESRCH);
> >>       WARN_ON(job->s_fence->parent);
> >>       job->sched->ops->free_job(job);
> >>   }
> >> @@ -195,8 +195,6 @@ static void drm_sched_entity_kill(struct 
> drm_sched_entity *entity)
> >>       while ((job = 
> to_drm_sched_job(spsc_queue_pop(&entity->job_queue)))) {
> >>               struct drm_sched_fence *s_fence = job->s_fence;
> >>
> >> - dma_fence_set_error(&s_fence->finished, -ESRCH);
> >> -
> >> dma_fence_get(&s_fence->finished);
> >>               if (!prev || dma_fence_add_callback(prev, 
> &job->finish_cb,
> >> drm_sched_entity_kill_jobs_cb))
> >> diff --git a/drivers/gpu/drm/scheduler/sched_fence.c 
> b/drivers/gpu/drm/scheduler/sched_fence.c
> >> index 7fd869520ef2..1a6bea98c5cc 100644
> >> --- a/drivers/gpu/drm/scheduler/sched_fence.c
> >> +++ b/drivers/gpu/drm/scheduler/sched_fence.c
> >> @@ -53,8 +53,10 @@ void drm_sched_fence_scheduled(struct 
> drm_sched_fence *fence)
> >> dma_fence_signal(&fence->scheduled);
> >>   }
> >>
> >> -void drm_sched_fence_finished(struct drm_sched_fence *fence)
> >> +void drm_sched_fence_finished(struct drm_sched_fence *fence, int 
> result)
> >>   {
> >> +    if (result)
> >> + dma_fence_set_error(&fence->finished, result);
> >> dma_fence_signal(&fence->finished);
> >>   }
> >>
> >> diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
> b/drivers/gpu/drm/scheduler/sched_main.c
> >> index fcd4bfef7415..649fac2e1ccb 100644
> >> --- a/drivers/gpu/drm/scheduler/sched_main.c
> >> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> >> @@ -257,7 +257,7 @@ drm_sched_rq_select_entity_fifo(struct 
> drm_sched_rq *rq)
> >>    *
> >>    * Finish the job's fence and wake up the worker thread.
> >>    */
> >> -static void drm_sched_job_done(struct drm_sched_job *s_job)
> >> +static void drm_sched_job_done(struct drm_sched_job *s_job, int 
> result)
> >>   {
> >>       struct drm_sched_fence *s_fence = s_job->s_fence;
> >>       struct drm_gpu_scheduler *sched = s_fence->sched;
> >> @@ -268,7 +268,7 @@ static void drm_sched_job_done(struct 
> drm_sched_job *s_job)
> >>       trace_drm_sched_process_job(s_fence);
> >>
> >>       dma_fence_get(&s_fence->finished);
> >> -    drm_sched_fence_finished(s_fence);
> >> +    drm_sched_fence_finished(s_fence, result);
> >>       dma_fence_put(&s_fence->finished);
> >> wake_up_interruptible(&sched->wake_up_worker);
> >>   }
> >> @@ -282,7 +282,7 @@ static void drm_sched_job_done_cb(struct 
> dma_fence *f, struct dma_fence_cb *cb)
> >>   {
> >>       struct drm_sched_job *s_job = container_of(cb, struct 
> drm_sched_job, cb);
> >>
> >> -    drm_sched_job_done(s_job);
> >> +    drm_sched_job_done(s_job, f->error);
> >>   }
> >>
> >>   /**
> >> @@ -533,12 +533,12 @@ void drm_sched_start(struct drm_gpu_scheduler 
> *sched, bool full_recovery)
> >>                       r = dma_fence_add_callback(fence, &s_job->cb,
> >>                                                 drm_sched_job_done_cb);
> >>                       if (r == -ENOENT)
> >> - drm_sched_job_done(s_job);
> >> + drm_sched_job_done(s_job, fence->error);
> >>                       else if (r)
> >> DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n",
> >>                                         r);
> >>               } else
> >> -                    drm_sched_job_done(s_job);
> >> +                    drm_sched_job_done(s_job, 0);
> >>       }
> >>
> >>       if (full_recovery) {
> >> @@ -1010,15 +1010,13 @@ static int drm_sched_main(void *param)
> >>                       r = dma_fence_add_callback(fence, &sched_job->cb,
> >>                                                 drm_sched_job_done_cb);
> >>                       if (r == -ENOENT)
> >> - drm_sched_job_done(sched_job);
> >> + drm_sched_job_done(sched_job, fence->error);
> >>                       else if (r)
> >> DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n",
> >>                                         r);
> >>               } else {
> >> -                    if (IS_ERR(fence))
> >> - dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
> >> -
> >> - drm_sched_job_done(sched_job);
> >> + drm_sched_job_done(sched_job, IS_ERR(fence) ?
> >> + PTR_ERR(fence) : 0);
> >>               }
> >>
> >> wake_up(&sched->job_scheduled);
> >> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> >> index ca857ec9e7eb..5c1df6b12ced 100644
> >> --- a/include/drm/gpu_scheduler.h
> >> +++ b/include/drm/gpu_scheduler.h
> >> @@ -569,7 +569,7 @@ void drm_sched_fence_init(struct 
> drm_sched_fence *fence,
> >>   void drm_sched_fence_free(struct drm_sched_fence *fence);
> >>
> >>   void drm_sched_fence_scheduled(struct drm_sched_fence *fence);
> >> -void drm_sched_fence_finished(struct drm_sched_fence *fence);
> >> +void drm_sched_fence_finished(struct drm_sched_fence *fence, int 
> result);
> >>
> >>   unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler 
> *sched);
> >>   void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched,
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/amd-gfx/attachments/20230424/c2319e87/attachment-0001.htm>


More information about the amd-gfx mailing list