[PATCH v4 1/2] drm/sched: Refactor ring mirror list handling.
Grodzovsky, Andrey
Andrey.Grodzovsky at amd.com
Wed Dec 19 17:32:40 UTC 2018
On 12/19/2018 11:21 AM, Christian König wrote:
> Am 17.12.18 um 20:51 schrieb Andrey Grodzovsky:
>> Decauple sched threads stop and start and ring mirror
>> list handling from the policy of what to do about the
>> guilty jobs.
>> When stoppping the sched thread and detaching sched fences
>> from non signaled HW fenes wait for all signaled HW fences
>> to complete before rerunning the jobs.
>>
>> v2: Fix resubmission of guilty job into HW after refactoring.
>>
>> v4:
>> Full restart for all the jobs, not only from guilty ring.
>> Extract karma increase into standalone function.
>>
>> Suggested-by: Christian Koenig <Christian.Koenig at amd.com>
>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 17 +--
>> drivers/gpu/drm/etnaviv/etnaviv_sched.c | 8 +-
>> drivers/gpu/drm/scheduler/sched_main.c | 164
>> +++++++++++++++++------------
>> drivers/gpu/drm/v3d/v3d_sched.c | 9 +-
>> include/drm/gpu_scheduler.h | 9 +-
>> 5 files changed, 118 insertions(+), 89 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index 8a078f4..8ac4f43 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -3298,12 +3298,7 @@ static int amdgpu_device_pre_asic_reset(struct
>> amdgpu_device *adev,
>> if (!ring || !ring->sched.thread)
>> continue;
>> - kthread_park(ring->sched.thread);
>> -
>> - if (job && job->base.sched != &ring->sched)
>> - continue;
>> -
>> - drm_sched_hw_job_reset(&ring->sched, job ? &job->base : NULL);
>> + drm_sched_stop(&ring->sched, job ? &job->base : NULL);
>> /* after all hw jobs are reset, hw fence is meaningless,
>> so force_completion */
>> amdgpu_fence_driver_force_completion(ring);
>> @@ -3454,14 +3449,10 @@ static void
>> amdgpu_device_post_asic_reset(struct amdgpu_device *adev,
>> if (!ring || !ring->sched.thread)
>> continue;
>> - /* only need recovery sched of the given job's ring
>> - * or all rings (in the case @job is NULL)
>> - * after above amdgpu_reset accomplished
>> - */
>> - if ((!job || job->base.sched == &ring->sched) &&
>> !adev->asic_reset_res)
>> - drm_sched_job_recovery(&ring->sched);
>> + if (!adev->asic_reset_res)
>> + drm_sched_resubmit_jobs(&ring->sched);
>> - kthread_unpark(ring->sched.thread);
>> + drm_sched_start(&ring->sched, !adev->asic_reset_res);
>> }
>> if (!amdgpu_device_has_dc_support(adev)) {
>> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
>> b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
>> index 49a6763..d7075cd 100644
>> --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
>> +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
>> @@ -109,16 +109,16 @@ static void etnaviv_sched_timedout_job(struct
>> drm_sched_job *sched_job)
>> }
>> /* block scheduler */
>> - kthread_park(gpu->sched.thread);
>> - drm_sched_hw_job_reset(&gpu->sched, sched_job);
>> + drm_sched_stop(&gpu->sched, sched_job);
>> /* get the GPU back into the init state */
>> etnaviv_core_dump(gpu);
>> etnaviv_gpu_recover_hang(gpu);
>> + drm_sched_resubmit_jobs(&gpu->sched);
>> +
>> /* restart scheduler after GPU is usable again */
>> - drm_sched_job_recovery(&gpu->sched);
>> - kthread_unpark(gpu->sched.thread);
>> + drm_sched_start(&gpu->sched, true);
>> }
>> static void etnaviv_sched_free_job(struct drm_sched_job *sched_job)
>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c
>> b/drivers/gpu/drm/scheduler/sched_main.c
>> index dbb6906..1cf9541 100644
>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>> @@ -60,8 +60,6 @@
>> static void drm_sched_process_job(struct dma_fence *f, struct
>> dma_fence_cb *cb);
>> -static void drm_sched_expel_job_unlocked(struct drm_sched_job
>> *s_job);
>> -
>> /**
>> * drm_sched_rq_init - initialize a given run queue struct
>> *
>> @@ -335,6 +333,41 @@ static void drm_sched_job_timedout(struct
>> work_struct *work)
>> spin_unlock_irqrestore(&sched->job_list_lock, flags);
>> }
>> +static void drm_sched_increase_karma(struct drm_sched_job *bad)
>> +{
>> + int i;
>> + struct drm_sched_entity *tmp;
>> + struct drm_sched_entity *entity;
>> + struct drm_gpu_scheduler *sched = bad->sched;
>> +
>> + /* don't increase @bad's karma if it's from KERNEL RQ,
>> + * because sometimes GPU hang would cause kernel jobs (like VM
>> updating jobs)
>> + * corrupt but keep in mind that kernel jobs always considered
>> good.
>> + */
>> + if (bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) {
>> + atomic_inc(&bad->karma);
>> + for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_KERNEL;
>> + i++) {
>> + struct drm_sched_rq *rq = &sched->sched_rq[i];
>> +
>> + spin_lock(&rq->lock);
>> + list_for_each_entry_safe(entity, tmp, &rq->entities,
>> list) {
>> + if (bad->s_fence->scheduled.context ==
>> + entity->fence_context) {
>> + if (atomic_read(&bad->karma) >
>> + bad->sched->hang_limit)
>> + if (entity->guilty)
>> + atomic_set(entity->guilty, 1);
>> + break;
>> + }
>> + }
>> + spin_unlock(&rq->lock);
>> + if (&entity->list != &rq->entities)
>> + break;
>> + }
>> + }
>> +}
>> +
>> /**
>> * drm_sched_hw_job_reset - stop the scheduler if it contains the
>> bad job
>> *
>> @@ -342,12 +375,15 @@ static void drm_sched_job_timedout(struct
>> work_struct *work)
>> * @bad: bad scheduler job
>> *
>> */
>> -void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched, struct
>> drm_sched_job *bad)
>> +void drm_sched_stop(struct drm_gpu_scheduler *sched, struct
>> drm_sched_job *bad)
>> {
>> struct drm_sched_job *s_job;
>> - struct drm_sched_entity *entity, *tmp;
>> unsigned long flags;
>> - int i;
>> + struct list_head wait_list;
>> +
>> + kthread_park(sched->thread);
>> +
>> + INIT_LIST_HEAD(&wait_list);
>> spin_lock_irqsave(&sched->job_list_lock, flags);
>> list_for_each_entry_reverse(s_job, &sched->ring_mirror_list,
>> node) {
>> @@ -357,35 +393,28 @@ void drm_sched_hw_job_reset(struct
>> drm_gpu_scheduler *sched, struct drm_sched_jo
>> dma_fence_put(s_job->s_fence->parent);
>> s_job->s_fence->parent = NULL;
>> atomic_dec(&sched->hw_rq_count);
>> + } else {
>> + /* TODO Is it get/put neccessey here ? */
>> + dma_fence_get(&s_job->s_fence->finished);
>> + list_add(&s_job->finish_node, &wait_list);
>> }
>> }
>> spin_unlock_irqrestore(&sched->job_list_lock, flags);
>> - if (bad && bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) {
>> - atomic_inc(&bad->karma);
>> - /* don't increase @bad's karma if it's from KERNEL RQ,
>> - * becuase sometimes GPU hang would cause kernel jobs (like
>> VM updating jobs)
>> - * corrupt but keep in mind that kernel jobs always
>> considered good.
>> - */
>> - for (i = DRM_SCHED_PRIORITY_MIN; i <
>> DRM_SCHED_PRIORITY_KERNEL; i++ ) {
>> - struct drm_sched_rq *rq = &sched->sched_rq[i];
>> -
>> - spin_lock(&rq->lock);
>> - list_for_each_entry_safe(entity, tmp, &rq->entities,
>> list) {
>> - if (bad->s_fence->scheduled.context ==
>> entity->fence_context) {
>> - if (atomic_read(&bad->karma) >
>> bad->sched->hang_limit)
>> - if (entity->guilty)
>> - atomic_set(entity->guilty, 1);
>> - break;
>> - }
>> - }
>> - spin_unlock(&rq->lock);
>> - if (&entity->list != &rq->entities)
>> - break;
>> - }
>> + /*
>> + * Verify all the signaled jobs in mirror list are removed from
>> the ring
>> + * We rely on the fact that any finish_work in progress will
>> wait for this
>> + * handler to complete before releasing all of the jobs we iterate.
>> + */
>> + list_for_each_entry(s_job, &wait_list, finish_node) {
>> + dma_fence_wait(&s_job->s_fence->finished, false);
>> + dma_fence_put(&s_job->s_fence->finished);
>> }
>
> Mhm, thinking more about it that approach won't work.
>
> Only the scheduler instance with the timeout will block on the GPU
> reset to finish before freeing up s_job.
>
> The job_list_lock will protect us, but as soon as that is dropped we
> are potentially working with freed up jobs here.
>
> Maybe your idea of just waiting for the last finished fence to signal
> wasn't so bad after all. This way you only need to grab the last one
> and wait for it.
I was concerned with relying on the assumption saying that if last job
(it's fence actually) in ring_mirror_list is signaled then necessary
all the previous jobs/fences in the list have already signaled - does
the order of submission into HW ring (same as order of insertion into
ring_mirror_list ) implies necessary same order of job/fence completion
? I know we talked about it already but I still not sure, is this
ALWAYS true ?
What about instead of using the jobs as elements for the wait_list we
can just locally here dynamically allocate some
struct fence_cont {
struct list_head node;
struct dma_fence *finished;
}
and then we can iterate that list and wait for the fence to signal
without worrying for the jobs being already released ?
Andrey
>
> And since fences are reference counted we also don't run into any
> problems because of that.
>
>> +
>> + if (bad)
>> + drm_sched_increase_karma(bad);
>
> Better export drm_sched_increase_karma() and let the drivers call that
> implicitly.
>
> Regards,
> Christian.
>
>> }
>> -EXPORT_SYMBOL(drm_sched_hw_job_reset);
>> +EXPORT_SYMBOL(drm_sched_stop);
>> /**
>> * drm_sched_job_recovery - recover jobs after a reset
>> @@ -393,33 +422,21 @@ EXPORT_SYMBOL(drm_sched_hw_job_reset);
>> * @sched: scheduler instance
>> *
>> */
>> -void drm_sched_job_recovery(struct drm_gpu_scheduler *sched)
>> +void drm_sched_start(struct drm_gpu_scheduler *sched, bool
>> full_recovery)
>> {
>> struct drm_sched_job *s_job, *tmp;
>> - bool found_guilty = false;
>> unsigned long flags;
>> int r;
>> + if (!full_recovery)
>> + goto unpark;
>> +
>> spin_lock_irqsave(&sched->job_list_lock, flags);
>> list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list,
>> node) {
>> struct drm_sched_fence *s_fence = s_job->s_fence;
>> - struct dma_fence *fence;
>> - uint64_t guilty_context;
>> -
>> - if (!found_guilty && atomic_read(&s_job->karma) >
>> sched->hang_limit) {
>> - found_guilty = true;
>> - guilty_context = s_job->s_fence->scheduled.context;
>> - }
>> -
>> - if (found_guilty && s_job->s_fence->scheduled.context ==
>> guilty_context)
>> - dma_fence_set_error(&s_fence->finished, -ECANCELED);
>> -
>> - spin_unlock_irqrestore(&sched->job_list_lock, flags);
>> - fence = sched->ops->run_job(s_job);
>> - atomic_inc(&sched->hw_rq_count);
>> + struct dma_fence *fence = s_job->s_fence->parent;
>> if (fence) {
>> - s_fence->parent = dma_fence_get(fence);
>> r = dma_fence_add_callback(fence, &s_fence->cb,
>> drm_sched_process_job);
>> if (r == -ENOENT)
>> @@ -427,18 +444,47 @@ void drm_sched_job_recovery(struct
>> drm_gpu_scheduler *sched)
>> else if (r)
>> DRM_ERROR("fence add callback failed (%d)\n",
>> r);
>> - dma_fence_put(fence);
>> - } else {
>> - if (s_fence->finished.error < 0)
>> - drm_sched_expel_job_unlocked(s_job);
>> + } else
>> drm_sched_process_job(NULL, &s_fence->cb);
>> - }
>> - spin_lock_irqsave(&sched->job_list_lock, flags);
>> }
>> +
>> drm_sched_start_timeout(sched);
>> spin_unlock_irqrestore(&sched->job_list_lock, flags);
>> +
>> +unpark:
>> + kthread_unpark(sched->thread);
>> }
>> -EXPORT_SYMBOL(drm_sched_job_recovery);
>> +EXPORT_SYMBOL(drm_sched_start);
>> +
>> +/**
>> + * drm_sched_resubmit_jobs - helper to relunch job from mirror ring
>> list
>> + *
>> + * @sched: scheduler instance
>> + *
>> + */
>> +void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched)
>> +{
>> + struct drm_sched_job *s_job, *tmp;
>> + uint64_t guilty_context;
>> + bool found_guilty = false;
>> +
>> + /*TODO DO we need spinlock here ? */
>> + list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list,
>> node) {
>> + struct drm_sched_fence *s_fence = s_job->s_fence;
>> +
>> + if (!found_guilty && atomic_read(&s_job->karma) >
>> sched->hang_limit) {
>> + found_guilty = true;
>> + guilty_context = s_job->s_fence->scheduled.context;
>> + }
>> +
>> + if (found_guilty && s_job->s_fence->scheduled.context ==
>> guilty_context)
>> + dma_fence_set_error(&s_fence->finished, -ECANCELED);
>> +
>> + s_job->s_fence->parent = sched->ops->run_job(s_job);
>> + atomic_inc(&sched->hw_rq_count);
>> + }
>> +}
>> +EXPORT_SYMBOL(drm_sched_resubmit_jobs);
>> /**
>> * drm_sched_job_init - init a scheduler job
>> @@ -634,26 +680,14 @@ static int drm_sched_main(void *param)
>> DRM_ERROR("fence add callback failed (%d)\n",
>> r);
>> dma_fence_put(fence);
>> - } else {
>> - if (s_fence->finished.error < 0)
>> - drm_sched_expel_job_unlocked(sched_job);
>> + } else
>> drm_sched_process_job(NULL, &s_fence->cb);
>> - }
>> wake_up(&sched->job_scheduled);
>> }
>> return 0;
>> }
>> -static void drm_sched_expel_job_unlocked(struct drm_sched_job *s_job)
>> -{
>> - struct drm_gpu_scheduler *sched = s_job->sched;
>> -
>> - spin_lock(&sched->job_list_lock);
>> - list_del_init(&s_job->node);
>> - spin_unlock(&sched->job_list_lock);
>> -}
>> -
>> /**
>> * drm_sched_init - Init a gpu scheduler instance
>> *
>> diff --git a/drivers/gpu/drm/v3d/v3d_sched.c
>> b/drivers/gpu/drm/v3d/v3d_sched.c
>> index 445b2ef..aaa10b0 100644
>> --- a/drivers/gpu/drm/v3d/v3d_sched.c
>> +++ b/drivers/gpu/drm/v3d/v3d_sched.c
>> @@ -178,18 +178,19 @@ v3d_job_timedout(struct drm_sched_job *sched_job)
>> for (q = 0; q < V3D_MAX_QUEUES; q++) {
>> struct drm_gpu_scheduler *sched = &v3d->queue[q].sched;
>> - kthread_park(sched->thread);
>> - drm_sched_hw_job_reset(sched, (sched_job->sched == sched ?
>> + drm_sched_stop(sched, (sched_job->sched == sched ?
>> sched_job : NULL));
>> }
>> /* get the GPU back into the init state */
>> v3d_reset(v3d);
>> + for (q = 0; q < V3D_MAX_QUEUES; q++)
>> + drm_sched_resubmit_jobs(sched_job->sched);
>> +
>> /* Unblock schedulers and restart their jobs. */
>> for (q = 0; q < V3D_MAX_QUEUES; q++) {
>> - drm_sched_job_recovery(&v3d->queue[q].sched);
>> - kthread_unpark(v3d->queue[q].sched.thread);
>> + drm_sched_start(&v3d->queue[q].sched, true);
>> }
>> mutex_unlock(&v3d->reset_lock);
>> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
>> index 47e1979..441384c 100644
>> --- a/include/drm/gpu_scheduler.h
>> +++ b/include/drm/gpu_scheduler.h
>> @@ -175,6 +175,7 @@ struct drm_sched_fence *to_drm_sched_fence(struct
>> dma_fence *f);
>> * finished to remove the job from the
>> * @drm_gpu_scheduler.ring_mirror_list.
>> * @node: used to append this struct to the
>> @drm_gpu_scheduler.ring_mirror_list.
>> + * @finish_node: used in a list to wait on before resetting the
>> scheduler
>> * @id: a unique id assigned to each job scheduled on the scheduler.
>> * @karma: increment on every hang caused by this job. If this
>> exceeds the hang
>> * limit of the scheduler then the job is marked guilty and
>> will not
>> @@ -193,6 +194,7 @@ struct drm_sched_job {
>> struct dma_fence_cb finish_cb;
>> struct work_struct finish_work;
>> struct list_head node;
>> + struct list_head finish_node;
>> uint64_t id;
>> atomic_t karma;
>> enum drm_sched_priority s_priority;
>> @@ -298,9 +300,10 @@ int drm_sched_job_init(struct drm_sched_job *job,
>> void *owner);
>> void drm_sched_job_cleanup(struct drm_sched_job *job);
>> void drm_sched_wakeup(struct drm_gpu_scheduler *sched);
>> -void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched,
>> - struct drm_sched_job *job);
>> -void drm_sched_job_recovery(struct drm_gpu_scheduler *sched);
>> +void drm_sched_stop(struct drm_gpu_scheduler *sched,
>> + struct drm_sched_job *job);
>> +void drm_sched_start(struct drm_gpu_scheduler *sched, bool
>> full_recovery);
>> +void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched);
>> bool drm_sched_dependency_optimized(struct dma_fence* fence,
>> struct drm_sched_entity *entity);
>> void drm_sched_fault(struct drm_gpu_scheduler *sched);
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
More information about the etnaviv
mailing list