[PATCH 2/2] drm/scheduler: Add drm_sched_suspend/resume_timeout()
Koenig, Christian
Christian.Koenig at amd.com
Thu Nov 29 16:07:55 UTC 2018
Am 29.11.18 um 11:05 schrieb Sharat Masetty:
> This patch adds two new functions to help client drivers suspend and
> resume the scheduler job timeout. This can be useful in cases where the
> hardware has preemption support enabled. Using this, it is possible to have
> the timeout active only for the ring which is active on the ringbuffer.
> This patch also makes the job_list_lock IRQ safe.
>
> Suggested-by: Christian Koenig <Christian.Koenig at amd.com>
> Signed-off-by: Sharat Masetty <smasetty at codeaurora.org>
Maybe make changing the job_list_lock to irqsave a separate patch, but
either way looks good to me.
Patch is Reviewed-by: Christian König <christian.koenig at amd.com>
I'm going to pick them up tomorrow for upstreaming if nobody objects.
Christian.
> ---
> drivers/gpu/drm/etnaviv/etnaviv_dump.c | 9 ++--
> drivers/gpu/drm/scheduler/sched_main.c | 91 ++++++++++++++++++++++++++++------
> include/drm/gpu_scheduler.h | 4 ++
> 3 files changed, 86 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.c b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
> index 9146e30..fd6bad2 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_dump.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
> @@ -118,6 +118,7 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
> unsigned int n_obj, n_bomap_pages;
> size_t file_size, mmu_size;
> __le64 *bomap, *bomap_start;
> + unsigned long flags;
>
> /* Only catch the first event, or when manually re-armed */
> if (!etnaviv_dump_core)
> @@ -134,13 +135,13 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
> mmu_size + gpu->buffer.size;
>
> /* Add in the active command buffers */
> - spin_lock(&gpu->sched.job_list_lock);
> + spin_lock_irqsave(&sched->job_list_lock, flags);
> list_for_each_entry(s_job, &gpu->sched.ring_mirror_list, node) {
> submit = to_etnaviv_submit(s_job);
> file_size += submit->cmdbuf.size;
> n_obj++;
> }
> - spin_unlock(&gpu->sched.job_list_lock);
> + spin_unlock_irqrestore(&sched->job_list_lock, flags);
>
> /* Add in the active buffer objects */
> list_for_each_entry(vram, &gpu->mmu->mappings, mmu_node) {
> @@ -182,14 +183,14 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
> gpu->buffer.size,
> etnaviv_cmdbuf_get_va(&gpu->buffer));
>
> - spin_lock(&gpu->sched.job_list_lock);
> + spin_lock_irqsave(&sched->job_list_lock, flags);
> list_for_each_entry(s_job, &gpu->sched.ring_mirror_list, node) {
> submit = to_etnaviv_submit(s_job);
> etnaviv_core_dump_mem(&iter, ETDUMP_BUF_CMD,
> submit->cmdbuf.vaddr, submit->cmdbuf.size,
> etnaviv_cmdbuf_get_va(&submit->cmdbuf));
> }
> - spin_unlock(&gpu->sched.job_list_lock);
> + spin_unlock_irqrestore(&sched->job_list_lock, flags);
>
> /* Reserve space for the bomap */
> if (n_bomap_pages) {
> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
> index c993d10..ca09b4e 100644
> --- a/drivers/gpu/drm/scheduler/sched_main.c
> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> @@ -196,10 +196,68 @@ static void drm_sched_start_timeout(struct drm_gpu_scheduler *sched)
> schedule_delayed_work(&sched->work_tdr, sched->timeout);
> }
>
> +/**
> + * drm_sched_suspend_timeout - Suspend scheduler job timeout
> + *
> + * @sched: scheduler instance for which to suspend the timeout
> + *
> + * Suspend the delayed work timeout for the scheduler. This is done by
> + * modifying the delayed work timeout to an arbitrary large value,
> + * MAX_SCHEDULE_TIMEOUT in this case. Note that this function can be
> + * called from an IRQ context.
> + *
> + * Returns the timeout remaining
> + *
> + */
> +unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched)
> +{
> + unsigned long sched_timeout, now = jiffies;
> +
> + sched_timeout = sched->work_tdr.timer.expires;
> +
> + /*
> + * Modify the timeout to an arbitrarily large value. This also prevents
> + * the timeout to be restarted when new submissions arrive
> + */
> + if (mod_delayed_work(system_wq, &sched->work_tdr, MAX_SCHEDULE_TIMEOUT)
> + && time_after(sched_timeout, now))
> + return sched_timeout - now;
> + else
> + return sched->timeout;
> +}
> +EXPORT_SYMBOL(drm_sched_suspend_timeout);
> +
> +/**
> + * drm_sched_resume_timeout - Resume scheduler job timeout
> + *
> + * @sched: scheduler instance for which to resume the timeout
> + * @remaining: remaining timeout
> + *
> + * Resume the delayed work timeout for the scheduler. Note that
> + * this function can be called from an IRQ context.
> + */
> +void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched,
> + unsigned long remaining)
> +{
> + unsigned long flags;
> +
> + spin_lock_irqsave(&sched->job_list_lock, flags);
> +
> + if (list_empty(&sched->ring_mirror_list))
> + cancel_delayed_work(&sched->work_tdr);
> + else
> + mod_delayed_work(system_wq, &sched->work_tdr, remaining);
> +
> + spin_unlock_irqrestore(&sched->job_list_lock, flags);
> +}
> +EXPORT_SYMBOL(drm_sched_resume_timeout);
> +
> /* job_finish is called after hw fence signaled
> */
> static void drm_sched_job_finish(struct work_struct *work)
> {
> + unsigned long flags;
> +
> struct drm_sched_job *s_job = container_of(work, struct drm_sched_job,
> finish_work);
> struct drm_gpu_scheduler *sched = s_job->sched;
> @@ -213,12 +271,12 @@ static void drm_sched_job_finish(struct work_struct *work)
> */
> cancel_delayed_work_sync(&sched->work_tdr);
>
> - spin_lock(&sched->job_list_lock);
> + spin_lock_irqsave(&sched->job_list_lock, flags);
> /* remove job from ring_mirror_list */
> list_del(&s_job->node);
> /* queue TDR for next job */
> drm_sched_start_timeout(sched);
> - spin_unlock(&sched->job_list_lock);
> + spin_unlock_irqrestore(&sched->job_list_lock, flags);
>
> dma_fence_put(&s_job->s_fence->finished);
> sched->ops->free_job(s_job);
> @@ -234,15 +292,17 @@ static void drm_sched_job_finish_cb(struct dma_fence *f,
>
> static void drm_sched_job_begin(struct drm_sched_job *s_job)
> {
> + unsigned long flags;
> +
> struct drm_gpu_scheduler *sched = s_job->sched;
>
> dma_fence_add_callback(&s_job->s_fence->finished, &s_job->finish_cb,
> drm_sched_job_finish_cb);
>
> - spin_lock(&sched->job_list_lock);
> + spin_lock_irqsave(&sched->job_list_lock, flags);
> list_add_tail(&s_job->node, &sched->ring_mirror_list);
> drm_sched_start_timeout(sched);
> - spin_unlock(&sched->job_list_lock);
> + spin_unlock_irqrestore(&sched->job_list_lock, flags);
> }
>
> static void drm_sched_job_timedout(struct work_struct *work)
> @@ -250,10 +310,11 @@ static void drm_sched_job_timedout(struct work_struct *work)
> struct drm_gpu_scheduler *sched;
> struct drm_sched_job *job;
> int r;
> + unsigned long flags;
>
> sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
>
> - spin_lock(&sched->job_list_lock);
> + spin_lock_irqsave(&sched->job_list_lock, flags);
> list_for_each_entry_reverse(job, &sched->ring_mirror_list, node) {
> struct drm_sched_fence *fence = job->s_fence;
>
> @@ -263,12 +324,12 @@ static void drm_sched_job_timedout(struct work_struct *work)
>
> job = list_first_entry_or_null(&sched->ring_mirror_list,
> struct drm_sched_job, node);
> - spin_unlock(&sched->job_list_lock);
> + spin_unlock_irqrestore(&sched->job_list_lock, flags);
>
> if (job)
> sched->ops->timedout_job(job);
>
> - spin_lock(&sched->job_list_lock);
> + spin_lock_irqsave(&sched->job_list_lock, flags);
> list_for_each_entry(job, &sched->ring_mirror_list, node) {
> struct drm_sched_fence *fence = job->s_fence;
>
> @@ -283,7 +344,7 @@ static void drm_sched_job_timedout(struct work_struct *work)
> already_signaled:
> ;
> }
> - spin_unlock(&sched->job_list_lock);
> + spin_unlock_irqrestore(&sched->job_list_lock, flags);
> }
>
> /**
> @@ -298,8 +359,9 @@ void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched, struct drm_sched_jo
> struct drm_sched_job *s_job;
> struct drm_sched_entity *entity, *tmp;
> int i;
> + unsigned long flags;
>
> - spin_lock(&sched->job_list_lock);
> + spin_lock_irqsave(&sched->job_list_lock, flags);
> list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node) {
> if (s_job->s_fence->parent &&
> dma_fence_remove_callback(s_job->s_fence->parent,
> @@ -309,7 +371,7 @@ void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched, struct drm_sched_jo
> atomic_dec(&sched->hw_rq_count);
> }
> }
> - spin_unlock(&sched->job_list_lock);
> + spin_unlock_irqrestore(&sched->job_list_lock, flags);
>
> if (bad && bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) {
> atomic_inc(&bad->karma);
> @@ -348,8 +410,9 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched)
> struct drm_sched_job *s_job, *tmp;
> bool found_guilty = false;
> int r;
> + unsigned long flags;
>
> - spin_lock(&sched->job_list_lock);
> + spin_lock_irqsave(&sched->job_list_lock, flags);
> list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
> struct drm_sched_fence *s_fence = s_job->s_fence;
> struct dma_fence *fence;
> @@ -363,7 +426,7 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched)
> if (found_guilty && s_job->s_fence->scheduled.context == guilty_context)
> dma_fence_set_error(&s_fence->finished, -ECANCELED);
>
> - spin_unlock(&sched->job_list_lock);
> + spin_unlock_irqrestore(&sched->job_list_lock, flags);
> fence = sched->ops->run_job(s_job);
> atomic_inc(&sched->hw_rq_count);
>
> @@ -380,10 +443,10 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched)
> } else {
> drm_sched_process_job(NULL, &s_fence->cb);
> }
> - spin_lock(&sched->job_list_lock);
> + spin_lock_irqsave(&sched->job_list_lock, flags);
> }
> drm_sched_start_timeout(sched);
> - spin_unlock(&sched->job_list_lock);
> + spin_unlock_irqrestore(&sched->job_list_lock, flags);
> }
> EXPORT_SYMBOL(drm_sched_job_recovery);
>
> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> index d87b268..c9657a8 100644
> --- a/include/drm/gpu_scheduler.h
> +++ b/include/drm/gpu_scheduler.h
> @@ -326,4 +326,8 @@ struct drm_sched_fence *drm_sched_fence_create(
> void drm_sched_fence_scheduled(struct drm_sched_fence *fence);
> void drm_sched_fence_finished(struct drm_sched_fence *fence);
>
> +unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched);
> +void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched,
> + unsigned long remaining);
> +
> #endif
More information about the dri-devel
mailing list