[PATCH v3 2/2] drm/sched: Rework HW fence processing.
Grodzovsky, Andrey
Andrey.Grodzovsky at amd.com
Tue Dec 11 16:01:51 UTC 2018
A I understand you say that by the time the fence callback runs the job
might have already been released,
but how so if the job gets released from drm_sched_job_finish work
handler in the normal flow - so, after the HW
fence (s_fence->parent) cb is executed. Other 2 flows are error use
cases where amdgpu_job_free is called directly in which
cases I assume the job wasn't submitted to HW. Last flow I see is
drm_sched_entity_kill_jobs_cb and here I actually see a problem
with the code as it's today - drm_sched_fence_finished is called which
will trigger s_fence->finished callback to run which today
schedules drm_sched_job_finish which releases the job, but we don't even
wait for that and call free_job cb directly from
after that which seems wrong to me.
Andrey
On 12/10/2018 09:45 PM, Zhou, David(ChunMing) wrote:
> I don't think adding cb to sched job would work as soon as their lifetime is different with fence.
> Unless you make the sched job reference, otherwise we will get trouble sooner or later.
>
> -David
>
>> -----Original Message-----
>> From: amd-gfx <amd-gfx-bounces at lists.freedesktop.org> On Behalf Of
>> Andrey Grodzovsky
>> Sent: Tuesday, December 11, 2018 5:44 AM
>> To: dri-devel at lists.freedesktop.org; amd-gfx at lists.freedesktop.org;
>> ckoenig.leichtzumerken at gmail.com; eric at anholt.net;
>> etnaviv at lists.freedesktop.org
>> Cc: Zhou, David(ChunMing) <David1.Zhou at amd.com>; Liu, Monk
>> <Monk.Liu at amd.com>; Grodzovsky, Andrey
>> <Andrey.Grodzovsky at amd.com>
>> Subject: [PATCH v3 2/2] drm/sched: Rework HW fence processing.
>>
>> Expedite job deletion from ring mirror list to the HW fence signal callback
>> instead from finish_work, together with waiting for all such fences to signal in
>> drm_sched_stop we garantee that already signaled job will not be processed
>> twice.
>> Remove the sched finish fence callback and just submit finish_work directly
>> from the HW fence callback.
>>
>> v2: Fix comments.
>>
>> v3: Attach hw fence cb to sched_job
>>
>> Suggested-by: Christian Koenig <Christian.Koenig at amd.com>
>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com>
>> ---
>> drivers/gpu/drm/scheduler/sched_main.c | 58 ++++++++++++++++----------
>> --------
>> include/drm/gpu_scheduler.h | 6 ++--
>> 2 files changed, 30 insertions(+), 34 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c
>> b/drivers/gpu/drm/scheduler/sched_main.c
>> index cdf95e2..f0c1f32 100644
>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>> @@ -284,8 +284,6 @@ static void drm_sched_job_finish(struct work_struct
>> *work)
>> cancel_delayed_work_sync(&sched->work_tdr);
>>
>> spin_lock_irqsave(&sched->job_list_lock, flags);
>> - /* remove job from ring_mirror_list */
>> - list_del_init(&s_job->node);
>> /* queue TDR for next job */
>> drm_sched_start_timeout(sched);
>> spin_unlock_irqrestore(&sched->job_list_lock, flags); @@ -293,22
>> +291,11 @@ static void drm_sched_job_finish(struct work_struct *work)
>> sched->ops->free_job(s_job);
>> }
>>
>> -static void drm_sched_job_finish_cb(struct dma_fence *f,
>> - struct dma_fence_cb *cb)
>> -{
>> - struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
>> - finish_cb);
>> - schedule_work(&job->finish_work);
>> -}
>> -
>> static void drm_sched_job_begin(struct drm_sched_job *s_job) {
>> struct drm_gpu_scheduler *sched = s_job->sched;
>> unsigned long flags;
>>
>> - dma_fence_add_callback(&s_job->s_fence->finished, &s_job-
>>> finish_cb,
>> - drm_sched_job_finish_cb);
>> -
>> spin_lock_irqsave(&sched->job_list_lock, flags);
>> list_add_tail(&s_job->node, &sched->ring_mirror_list);
>> drm_sched_start_timeout(sched);
>> @@ -359,12 +346,11 @@ void drm_sched_stop(struct drm_gpu_scheduler
>> *sched, struct drm_sched_job *bad,
>> list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node)
>> {
>> if (s_job->s_fence->parent &&
>> dma_fence_remove_callback(s_job->s_fence->parent,
>> - &s_job->s_fence->cb)) {
>> + &s_job->cb)) {
>> dma_fence_put(s_job->s_fence->parent);
>> s_job->s_fence->parent = NULL;
>> atomic_dec(&sched->hw_rq_count);
>> - }
>> - else {
>> + } else {
>> /* TODO Is it get/put neccessey here ? */
>> dma_fence_get(&s_job->s_fence->finished);
>> list_add(&s_job->finish_node, &wait_list); @@ -
>> 417,31 +403,34 @@ EXPORT_SYMBOL(drm_sched_stop); void
>> drm_sched_start(struct drm_gpu_scheduler *sched, bool unpark_only) {
>> struct drm_sched_job *s_job, *tmp;
>> - unsigned long flags;
>> int r;
>>
>> if (unpark_only)
>> goto unpark;
>>
>> - spin_lock_irqsave(&sched->job_list_lock, flags);
>> + /*
>> + * Locking the list is not required here as the sched thread is parked
>> + * so no new jobs are being pushed in to HW and in drm_sched_stop
>> we
>> + * flushed all the jobs who were still in mirror list but who already
>> + * signaled and removed them self from the list. Also concurrent
>> + * GPU recovers can't run in parallel.
>> + */
>> list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list,
>> node) {
>> - struct drm_sched_fence *s_fence = s_job->s_fence;
>> struct dma_fence *fence = s_job->s_fence->parent;
>>
>> if (fence) {
>> - r = dma_fence_add_callback(fence, &s_fence->cb,
>> + r = dma_fence_add_callback(fence, &s_job->cb,
>> drm_sched_process_job);
>> if (r == -ENOENT)
>> - drm_sched_process_job(fence, &s_fence-
>>> cb);
>> + drm_sched_process_job(fence, &s_job->cb);
>> else if (r)
>> DRM_ERROR("fence add callback failed
>> (%d)\n",
>> r);
>> } else
>> - drm_sched_process_job(NULL, &s_fence->cb);
>> + drm_sched_process_job(NULL, &s_job->cb);
>> }
>>
>> drm_sched_start_timeout(sched);
>> - spin_unlock_irqrestore(&sched->job_list_lock, flags);
>>
>> unpark:
>> kthread_unpark(sched->thread);
>> @@ -590,18 +579,27 @@ drm_sched_select_entity(struct
>> drm_gpu_scheduler *sched)
>> */
>> static void drm_sched_process_job(struct dma_fence *f, struct
>> dma_fence_cb *cb) {
>> - struct drm_sched_fence *s_fence =
>> - container_of(cb, struct drm_sched_fence, cb);
>> + struct drm_sched_job *s_job = container_of(cb, struct
>> drm_sched_job, cb);
>> + struct drm_sched_fence *s_fence = s_job->s_fence;
>> struct drm_gpu_scheduler *sched = s_fence->sched;
>> + unsigned long flags;
>> +
>> + cancel_delayed_work(&sched->work_tdr);
>>
>> - dma_fence_get(&s_fence->finished);
>> atomic_dec(&sched->hw_rq_count);
>> atomic_dec(&sched->num_jobs);
>> +
>> + spin_lock_irqsave(&sched->job_list_lock, flags);
>> + /* remove job from ring_mirror_list */
>> + list_del_init(&s_job->node);
>> + spin_unlock_irqrestore(&sched->job_list_lock, flags);
>> +
>> drm_sched_fence_finished(s_fence);
>>
>> trace_drm_sched_process_job(s_fence);
>> - dma_fence_put(&s_fence->finished);
>> wake_up_interruptible(&sched->wake_up_worker);
>> +
>> + schedule_work(&s_job->finish_work);
>> }
>>
>> /**
>> @@ -664,16 +662,16 @@ static int drm_sched_main(void *param)
>>
>> if (fence) {
>> s_fence->parent = dma_fence_get(fence);
>> - r = dma_fence_add_callback(fence, &s_fence->cb,
>> + r = dma_fence_add_callback(fence, &sched_job->cb,
>> drm_sched_process_job);
>> if (r == -ENOENT)
>> - drm_sched_process_job(fence, &s_fence-
>>> cb);
>> + drm_sched_process_job(fence, &sched_job-
>>> cb);
>> else if (r)
>> DRM_ERROR("fence add callback failed
>> (%d)\n",
>> r);
>> dma_fence_put(fence);
>> } else
>> - drm_sched_process_job(NULL, &s_fence->cb);
>> + drm_sched_process_job(NULL, &sched_job->cb);
>>
>> wake_up(&sched->job_scheduled);
>> }
>> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
>> index c94b592..f29aa1c 100644
>> --- a/include/drm/gpu_scheduler.h
>> +++ b/include/drm/gpu_scheduler.h
>> @@ -138,10 +138,6 @@ struct drm_sched_fence {
>> struct dma_fence finished;
>>
>> /**
>> - * @cb: the callback for the parent fence below.
>> - */
>> - struct dma_fence_cb cb;
>> - /**
>> * @parent: the fence returned by &drm_sched_backend_ops.run_job
>> * when scheduling the job on hardware. We signal the
>> * &drm_sched_fence.finished fence once parent is signalled.
>> @@ -182,6 +178,7 @@ struct drm_sched_fence
>> *to_drm_sched_fence(struct dma_fence *f);
>> * be scheduled further.
>> * @s_priority: the priority of the job.
>> * @entity: the entity to which this job belongs.
>> + * @cb: the callback for the parent fence in s_fence.
>> *
>> * A job is created by the driver using drm_sched_job_init(), and
>> * should call drm_sched_entity_push_job() once it wants the scheduler
>> @@ -199,6 +196,7 @@ struct drm_sched_job {
>> atomic_t karma;
>> enum drm_sched_priority s_priority;
>> struct drm_sched_entity *entity;
>> + struct dma_fence_cb cb;
>> };
>>
>> static inline bool drm_sched_invalidate_job(struct drm_sched_job *s_job,
>> --
>> 2.7.4
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
More information about the amd-gfx
mailing list