[PATCH v2] drm/scheduler: remove timeout work_struct from drm_sched_job

Wed Sep 26 08:13:20 UTC 2018

Am 26.09.2018 um 09:39 schrieb Lucas Stach:
> Hi Nayan,
>
> Am Mittwoch, den 26.09.2018, 02:09 +0900 schrieb Nayan Deshmukh:
>> having a delayed work item per job is redundant as we only need one
>> per scheduler to track the time out the currently executing job.
>>
>> v2: the first element of the ring mirror list is the currently
>> executing job so we don't need a additional variable for it
>>
>> Signed-off-by: Nayan Deshmukh <nayan26deshmukh at gmail.com>
>> Suggested-by: Christian König <christian.koenig at amd.com>
>> ---
>>   drivers/gpu/drm/scheduler/sched_main.c | 31 ++++++++++++++++---------------
>>   include/drm/gpu_scheduler.h            |  6 +++---
>>   2 files changed, 19 insertions(+), 18 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
>> index 9ca741f3a0bc..4e8505d51795 100644
>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>> @@ -197,19 +197,15 @@ static void drm_sched_job_finish(struct work_struct *work)
>>   	 * manages to find this job as the next job in the list, the fence
>>   	 * signaled check below will prevent the timeout to be restarted.
>>   	 */
>> -	cancel_delayed_work_sync(&s_job->work_tdr);
>> +	cancel_delayed_work_sync(&sched->work_tdr);
>>   
>>   	spin_lock(&sched->job_list_lock);
>> -	/* queue TDR for next job */
>> -	if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
>> -	    !list_is_last(&s_job->node, &sched->ring_mirror_list)) {
>> -		struct drm_sched_job *next = list_next_entry(s_job, node);
>> -
>> -		if (!dma_fence_is_signaled(&next->s_fence->finished))
>> -			schedule_delayed_work(&next->work_tdr, sched->timeout);
>> -	}
>>   	/* remove job from ring_mirror_list */
>>   	list_del(&s_job->node);
>> +	/* queue TDR for next job */
>> +	if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
>> +	    !list_empty(&sched->ring_mirror_list))
>> +		schedule_delayed_work(&sched->work_tdr, sched->timeout);
>>   	spin_unlock(&sched->job_list_lock);
>>   
>>   	dma_fence_put(&s_job->s_fence->finished);
>> @@ -236,16 +232,21 @@ static void drm_sched_job_begin(struct drm_sched_job *s_job)
>>   	if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
>>   	    list_first_entry_or_null(&sched->ring_mirror_list,
>>   				     struct drm_sched_job, node) == s_job)
>> -		schedule_delayed_work(&s_job->work_tdr, sched->timeout);
>> +		schedule_delayed_work(&sched->work_tdr, sched->timeout);
>>   	spin_unlock(&sched->job_list_lock);
>>   }
>>   
>>   static void drm_sched_job_timedout(struct work_struct *work)
>>   {
>> -	struct drm_sched_job *job = container_of(work, struct drm_sched_job,
>> -						 work_tdr.work);
>> +	struct drm_gpu_scheduler *sched;
>> +	struct drm_sched_job *job;
>> +
>> +	sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
>> +	job = list_first_entry_or_null(&sched->ring_mirror_list,
>> +				       struct drm_sched_job, node);
>>   
>> -	job->sched->ops->timedout_job(job);
>> +	if (job)
>> +		job->sched->ops->timedout_job(job);
> I don't think this is fully robust. Jobs are only removed from the
> ring_mirror_list once the job_finish worker has run. If execution of
> this worker is delayed for any reason (though it's really unlikely for
> a delay as long as the job timeout to happen) you are blaming the wrong
> job here.
>
> So I think what you need to to is find the first job in the ring mirror
> list with an unsignaled finish fence to robustly find the stuck job.

Yeah, that is a known problem I've pointed out as well.

The issue is we have bug reports that this happened before the patch, 
but I'm not 100% sure how.

My suggestion is to move a good part of the logic from 
drm_sched_hw_job_reset() and drm_sched_job_recovery() into 
drm_sched_job_timedout().

E.g. we first call dma_fence_remove_callback() for each job and actually 
check the return value if the fence was already signaled.

If we find a signaled fence we abort and add the callback back to the 
ones where we removed it.

Nayan do you want to take care of this or should I take a look?

Regards,
Christian.

>
> Regards,
> Lucas
>
>>   }
>>   
>>   /**
>> @@ -315,7 +316,7 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched)
>>   	s_job = list_first_entry_or_null(&sched->ring_mirror_list,
>>   					 struct drm_sched_job, node);
>>   	if (s_job && sched->timeout != MAX_SCHEDULE_TIMEOUT)
>> -		schedule_delayed_work(&s_job->work_tdr, sched->timeout);
>> +		schedule_delayed_work(&sched->work_tdr, sched->timeout);
>>   
>>   	list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
>>   		struct drm_sched_fence *s_fence = s_job->s_fence;
>> @@ -384,7 +385,6 @@ int drm_sched_job_init(struct drm_sched_job *job,
>>   
>>   	INIT_WORK(&job->finish_work, drm_sched_job_finish);
>>   	INIT_LIST_HEAD(&job->node);
>> -	INIT_DELAYED_WORK(&job->work_tdr, drm_sched_job_timedout);
>>   
>>   	return 0;
>>   }
>> @@ -575,6 +575,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
>>   	INIT_LIST_HEAD(&sched->ring_mirror_list);
>>   	spin_lock_init(&sched->job_list_lock);
>>   	atomic_set(&sched->hw_rq_count, 0);
>> +	INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
>>   	atomic_set(&sched->num_jobs, 0);
>>   	atomic64_set(&sched->job_id_count, 0);
>>   
>> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
>> index daec50f887b3..d87b268f1781 100644
>> --- a/include/drm/gpu_scheduler.h
>> +++ b/include/drm/gpu_scheduler.h
>> @@ -175,8 +175,6 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f);
>>    *               finished to remove the job from the
>>    *               @drm_gpu_scheduler.ring_mirror_list.
>>    * @node: used to append this struct to the @drm_gpu_scheduler.ring_mirror_list.
>> - * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the timeout
>> - *            interval is over.
>>    * @id: a unique id assigned to each job scheduled on the scheduler.
>>    * @karma: increment on every hang caused by this job. If this exceeds the hang
>>    *         limit of the scheduler then the job is marked guilty and will not
>> @@ -195,7 +193,6 @@ struct drm_sched_job {
>>   	struct dma_fence_cb		finish_cb;
>>   	struct work_struct		finish_work;
>>   	struct list_head		node;
>> -	struct delayed_work		work_tdr;
>>   	uint64_t			id;
>>   	atomic_t			karma;
>>   	enum drm_sched_priority		s_priority;
>> @@ -259,6 +256,8 @@ struct drm_sched_backend_ops {
>>    *                 finished.
>>    * @hw_rq_count: the number of jobs currently in the hardware queue.
>>    * @job_id_count: used to assign unique id to the each job.
>> + * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the
>> + *            timeout interval is over.
>>    * @thread: the kthread on which the scheduler which run.
>>    * @ring_mirror_list: the list of jobs which are currently in the job queue.
>>    * @job_list_lock: lock to protect the ring_mirror_list.
>> @@ -278,6 +277,7 @@ struct drm_gpu_scheduler {
>>   	wait_queue_head_t		job_scheduled;
>>   	atomic_t			hw_rq_count;
>>   	atomic64_t			job_id_count;
>> +	struct delayed_work		work_tdr;
>>   	struct task_struct		*thread;
>>   	struct list_head		ring_mirror_list;
>>   	spinlock_t			job_list_lock;
> _______________________________________________
> dri-devel mailing list
> dri-devel at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel