[PATCH 2/2] drm/sched: Rework HW fence processing.
Grodzovsky, Andrey
Andrey.Grodzovsky at amd.com
Thu Dec 6 18:12:08 UTC 2018
On 12/06/2018 12:41 PM, Andrey Grodzovsky wrote:
> Expedite job deletion from ring mirror list to the HW fence signal
> callback instead from finish_work, together with waiting for all
> such fences to signal in drm_sched_stop we garantee that
> already signaled job will not be processed twice.
> Remove the sched finish fence callback and just submit finish_work
> directly from the HW fence callback.
>
> Suggested-by: Christian Koenig <Christian.Koenig at amd.com>
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com>
> ---
> drivers/gpu/drm/scheduler/sched_fence.c | 4 +++-
> drivers/gpu/drm/scheduler/sched_main.c | 39 ++++++++++++++++-----------------
> include/drm/gpu_scheduler.h | 10 +++++++--
> 3 files changed, 30 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c
> index d8d2dff..e62c239 100644
> --- a/drivers/gpu/drm/scheduler/sched_fence.c
> +++ b/drivers/gpu/drm/scheduler/sched_fence.c
> @@ -151,7 +151,8 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f)
> EXPORT_SYMBOL(to_drm_sched_fence);
>
> struct drm_sched_fence *drm_sched_fence_create(struct drm_sched_entity *entity,
> - void *owner)
> + void *owner,
> + struct drm_sched_job *s_job)
> {
> struct drm_sched_fence *fence = NULL;
> unsigned seq;
> @@ -163,6 +164,7 @@ struct drm_sched_fence *drm_sched_fence_create(struct drm_sched_entity *entity,
> fence->owner = owner;
> fence->sched = entity->rq->sched;
> spin_lock_init(&fence->lock);
> + fence->s_job = s_job;
>
> seq = atomic_inc_return(&entity->fence_seq);
> dma_fence_init(&fence->scheduled, &drm_sched_fence_ops_scheduled,
> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
> index 8fb7f86..2860037 100644
> --- a/drivers/gpu/drm/scheduler/sched_main.c
> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> @@ -284,31 +284,17 @@ static void drm_sched_job_finish(struct work_struct *work)
> cancel_delayed_work_sync(&sched->work_tdr);
>
> spin_lock_irqsave(&sched->job_list_lock, flags);
> - /* remove job from ring_mirror_list */
> - list_del_init(&s_job->node);
> - /* queue TDR for next job */
> drm_sched_start_timeout(sched);
> spin_unlock_irqrestore(&sched->job_list_lock, flags);
>
> sched->ops->free_job(s_job);
> }
>
> -static void drm_sched_job_finish_cb(struct dma_fence *f,
> - struct dma_fence_cb *cb)
> -{
> - struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
> - finish_cb);
> - schedule_work(&job->finish_work);
> -}
> -
> static void drm_sched_job_begin(struct drm_sched_job *s_job)
> {
> struct drm_gpu_scheduler *sched = s_job->sched;
> unsigned long flags;
>
> - dma_fence_add_callback(&s_job->s_fence->finished, &s_job->finish_cb,
> - drm_sched_job_finish_cb);
> -
> spin_lock_irqsave(&sched->job_list_lock, flags);
> list_add_tail(&s_job->node, &sched->ring_mirror_list);
> drm_sched_start_timeout(sched);
> @@ -418,13 +404,17 @@ void drm_sched_start(struct drm_gpu_scheduler *sched, bool unpark_only)
> {
> struct drm_sched_job *s_job, *tmp;
> bool found_guilty = false;
> - unsigned long flags;
> int r;
>
> if (unpark_only)
> goto unpark;
>
> - spin_lock_irqsave(&sched->job_list_lock, flags);
> + /*
> + * Locking the list is not required here as the sched thread is parked
> + * so no new jobs are being pushed in to HW and in drm_sched_stop we
> + * flushed any in flight jobs who didn't signal yet.
The comment is inaccurate here - it's supposed to be ' any in flight
jobs who already have their
sched finished signaled and they are removed from the mirror ring list
at that point already anyway'
I will fix this text later with other comments received on the patches.
Andrey
> Also concurrent
> + * GPU recovers can't run in parallel.
> + */
> list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
> struct drm_sched_fence *s_fence = s_job->s_fence;
> struct dma_fence *fence;
> @@ -453,7 +443,6 @@ void drm_sched_start(struct drm_gpu_scheduler *sched, bool unpark_only)
> }
>
> drm_sched_start_timeout(sched);
> - spin_unlock_irqrestore(&sched->job_list_lock, flags);
>
> unpark:
> kthread_unpark(sched->thread);
> @@ -505,7 +494,7 @@ int drm_sched_job_init(struct drm_sched_job *job,
> job->sched = sched;
> job->entity = entity;
> job->s_priority = entity->rq - sched->sched_rq;
> - job->s_fence = drm_sched_fence_create(entity, owner);
> + job->s_fence = drm_sched_fence_create(entity, owner, job);
> if (!job->s_fence)
> return -ENOMEM;
> job->id = atomic64_inc_return(&sched->job_id_count);
> @@ -593,15 +582,25 @@ static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb)
> struct drm_sched_fence *s_fence =
> container_of(cb, struct drm_sched_fence, cb);
> struct drm_gpu_scheduler *sched = s_fence->sched;
> + struct drm_sched_job *s_job = s_fence->s_job;
> + unsigned long flags;
> +
> + cancel_delayed_work(&sched->work_tdr);
>
> - dma_fence_get(&s_fence->finished);
> atomic_dec(&sched->hw_rq_count);
> atomic_dec(&sched->num_jobs);
> +
> + spin_lock_irqsave(&sched->job_list_lock, flags);
> + /* remove job from ring_mirror_list */
> + list_del_init(&s_job->node);
> + spin_unlock_irqrestore(&sched->job_list_lock, flags);
> +
> drm_sched_fence_finished(s_fence);
>
> trace_drm_sched_process_job(s_fence);
> - dma_fence_put(&s_fence->finished);
> wake_up_interruptible(&sched->wake_up_worker);
> +
> + schedule_work(&s_job->finish_work);
> }
>
> /**
> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> index c94b592..23855c6 100644
> --- a/include/drm/gpu_scheduler.h
> +++ b/include/drm/gpu_scheduler.h
> @@ -115,6 +115,8 @@ struct drm_sched_rq {
> struct drm_sched_entity *current_entity;
> };
>
> +struct drm_sched_job;
> +
> /**
> * struct drm_sched_fence - fences corresponding to the scheduling of a job.
> */
> @@ -160,6 +162,9 @@ struct drm_sched_fence {
> * @owner: job owner for debugging
> */
> void *owner;
> +
> + /* Back pointer to owning job */
> + struct drm_sched_job *s_job;
> };
>
> struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f);
> @@ -330,8 +335,9 @@ void drm_sched_entity_set_priority(struct drm_sched_entity *entity,
> enum drm_sched_priority priority);
> bool drm_sched_entity_is_ready(struct drm_sched_entity *entity);
>
> -struct drm_sched_fence *drm_sched_fence_create(
> - struct drm_sched_entity *s_entity, void *owner);
> +struct drm_sched_fence *drm_sched_fence_create(struct drm_sched_entity *s_entity,
> + void *owner,
> + struct drm_sched_job *s_job);
> void drm_sched_fence_scheduled(struct drm_sched_fence *fence);
> void drm_sched_fence_finished(struct drm_sched_fence *fence);
>
More information about the amd-gfx
mailing list