[PATCH v5 1/2] drm/sched: Refactor ring mirror list handling.

Thu Jan 10 15:56:42 UTC 2019

Just a ping.

Andrey

On 01/09/2019 10:18 AM, Andrey Grodzovsky wrote:
>
>
> On 01/09/2019 05:22 AM, Christian König wrote:
>> Am 07.01.19 um 20:47 schrieb Grodzovsky, Andrey:
>>>
>>> On 01/07/2019 09:13 AM, Christian König wrote:
>>>> Am 03.01.19 um 18:42 schrieb Grodzovsky, Andrey:
>>>>> On 01/03/2019 11:20 AM, Grodzovsky, Andrey wrote:
>>>>>> On 01/03/2019 03:54 AM, Koenig, Christian wrote:
>>>>>>> Am 21.12.18 um 21:36 schrieb Grodzovsky, Andrey:
>>>>>>>> On 12/21/2018 01:37 PM, Christian König wrote:
>>>>>>>>> Am 20.12.18 um 20:23 schrieb Andrey Grodzovsky:
>>>>>>>>>> Decauple sched threads stop and start and ring mirror
>>>>>>>>>> list handling from the policy of what to do about the
>>>>>>>>>> guilty jobs.
>>>>>>>>>> When stoppping the sched thread and detaching sched fences
>>>>>>>>>> from non signaled HW fenes wait for all signaled HW fences
>>>>>>>>>> to complete before rerunning the jobs.
>>>>>>>>>>
>>>>>>>>>> v2: Fix resubmission of guilty job into HW after refactoring.
>>>>>>>>>>
>>>>>>>>>> v4:
>>>>>>>>>> Full restart for all the jobs, not only from guilty ring.
>>>>>>>>>> Extract karma increase into standalone function.
>>>>>>>>>>
>>>>>>>>>> v5:
>>>>>>>>>> Rework waiting for signaled jobs without relying on the job
>>>>>>>>>> struct itself as those might already be freed for non 'guilty'
>>>>>>>>>> job's schedulers.
>>>>>>>>>> Expose karma increase to drivers.
>>>>>>>>>>
>>>>>>>>>> Suggested-by: Christian Koenig <Christian.Koenig at amd.com>
>>>>>>>>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com>
>>>>>>>>>> ---
>>>>>>>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  18 +--
>>>>>>>>>>        drivers/gpu/drm/etnaviv/etnaviv_sched.c |  11 +-
>>>>>>>>>>        drivers/gpu/drm/scheduler/sched_main.c | 188
>>>>>>>>>> +++++++++++++++++++----------
>>>>>>>>>>        drivers/gpu/drm/v3d/v3d_sched.c |  12 +-
>>>>>>>>>>        include/drm/gpu_scheduler.h |  10 +-
>>>>>>>>>>        5 files changed, 151 insertions(+), 88 deletions(-)
>>>>>>>>>>
>>>>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>>>>>> index 8a078f4..a4bd2d3 100644
>>>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>>>>>> @@ -3298,12 +3298,10 @@ static int
>>>>>>>>>> amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
>>>>>>>>>>                if (!ring || !ring->sched.thread)
>>>>>>>>>>                    continue;
>>>>>>>>>>        - kthread_park(ring->sched.thread);
>>>>>>>>>> +        drm_sched_stop(&ring->sched, job ? &job->base : NULL);
>>>>>>>>>>        -        if (job && job->base.sched != &ring->sched)
>>>>>>>>>> -            continue;
>>>>>>>>>> -
>>>>>>>>>> - drm_sched_hw_job_reset(&ring->sched, job ? &job->base :
>>>>>>>>>> NULL);
>>>>>>>>>> +        if(job)
>>>>>>>>>> + drm_sched_increase_karma(&job->base);
>>>>>>>>> Since we dropped the "job && job->base.sched != &ring->sched" 
>>>>>>>>> check
>>>>>>>>> above this will now increase the jobs karma multiple times.
>>>>>>>>>
>>>>>>>>> Maybe just move that outside of the loop.
>>>>>>>>>
>>>>>>>>>>                  /* after all hw jobs are reset, hw fence is
>>>>>>>>>> meaningless,
>>>>>>>>>> so force_completion */
>>>>>>>>>> amdgpu_fence_driver_force_completion(ring);
>>>>>>>>>> @@ -3454,14 +3452,10 @@ static void
>>>>>>>>>> amdgpu_device_post_asic_reset(struct amdgpu_device *adev,
>>>>>>>>>>                if (!ring || !ring->sched.thread)
>>>>>>>>>>                    continue;
>>>>>>>>>>        -        /* only need recovery sched of the given 
>>>>>>>>>> job's ring
>>>>>>>>>> -         * or all rings (in the case @job is NULL)
>>>>>>>>>> -         * after above amdgpu_reset accomplished
>>>>>>>>>> -         */
>>>>>>>>>> -        if ((!job || job->base.sched == &ring->sched) &&
>>>>>>>>>> !adev->asic_reset_res)
>>>>>>>>>> - drm_sched_job_recovery(&ring->sched);
>>>>>>>>>> +        if (!adev->asic_reset_res)
>>>>>>>>>> + drm_sched_resubmit_jobs(&ring->sched);
>>>>>>>>>>        - kthread_unpark(ring->sched.thread);
>>>>>>>>>> +        drm_sched_start(&ring->sched, !adev->asic_reset_res);
>>>>>>>>>>            }
>>>>>>>>>>              if (!amdgpu_device_has_dc_support(adev)) {
>>>>>>>>>> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
>>>>>>>>>> b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
>>>>>>>>>> index 49a6763..6f1268f 100644
>>>>>>>>>> --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
>>>>>>>>>> +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
>>>>>>>>>> @@ -109,16 +109,19 @@ static void 
>>>>>>>>>> etnaviv_sched_timedout_job(struct
>>>>>>>>>> drm_sched_job *sched_job)
>>>>>>>>>>            }
>>>>>>>>>>              /* block scheduler */
>>>>>>>>>> -    kthread_park(gpu->sched.thread);
>>>>>>>>>> -    drm_sched_hw_job_reset(&gpu->sched, sched_job);
>>>>>>>>>> +    drm_sched_stop(&gpu->sched, sched_job);
>>>>>>>>>> +
>>>>>>>>>> +    if(sched_job)
>>>>>>>>>> +        drm_sched_increase_karma(sched_job);
>>>>>>>>>>              /* get the GPU back into the init state */
>>>>>>>>>>            etnaviv_core_dump(gpu);
>>>>>>>>>>            etnaviv_gpu_recover_hang(gpu);
>>>>>>>>>>        + drm_sched_resubmit_jobs(&gpu->sched);
>>>>>>>>>> +
>>>>>>>>>>            /* restart scheduler after GPU is usable again */
>>>>>>>>>> -    drm_sched_job_recovery(&gpu->sched);
>>>>>>>>>> -    kthread_unpark(gpu->sched.thread);
>>>>>>>>>> +    drm_sched_start(&gpu->sched, true);
>>>>>>>>>>        }
>>>>>>>>>>          static void etnaviv_sched_free_job(struct drm_sched_job
>>>>>>>>>> *sched_job)
>>>>>>>>>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c
>>>>>>>>>> b/drivers/gpu/drm/scheduler/sched_main.c
>>>>>>>>>> index dbb6906..b5c5bee 100644
>>>>>>>>>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>>>>>>>>>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>>>>>>>>>> @@ -60,8 +60,6 @@
>>>>>>>>>>          static void drm_sched_process_job(struct dma_fence *f,
>>>>>>>>>> struct
>>>>>>>>>> dma_fence_cb *cb);
>>>>>>>>>>        -static void drm_sched_expel_job_unlocked(struct
>>>>>>>>>> drm_sched_job
>>>>>>>>>> *s_job);
>>>>>>>>>> -
>>>>>>>>>>        /**
>>>>>>>>>>         * drm_sched_rq_init - initialize a given run queue 
>>>>>>>>>> struct
>>>>>>>>>>         *
>>>>>>>>>> @@ -335,6 +333,42 @@ static void drm_sched_job_timedout(struct
>>>>>>>>>> work_struct *work)
>>>>>>>>>> spin_unlock_irqrestore(&sched->job_list_lock, flags);
>>>>>>>>>>        }
>>>>>>>>> Kernel doc here would be nice to have.
>>>>>>>>>
>>>>>>>>>> +void drm_sched_increase_karma(struct drm_sched_job *bad)
>>>>>>>>>> +{
>>>>>>>>>> +    int i;
>>>>>>>>>> +    struct drm_sched_entity *tmp;
>>>>>>>>>> +    struct drm_sched_entity *entity;
>>>>>>>>>> +    struct drm_gpu_scheduler *sched = bad->sched;
>>>>>>>>>> +
>>>>>>>>>> +    /* don't increase @bad's karma if it's from KERNEL RQ,
>>>>>>>>>> +     * because sometimes GPU hang would cause kernel jobs 
>>>>>>>>>> (like VM
>>>>>>>>>> updating jobs)
>>>>>>>>>> +     * corrupt but keep in mind that kernel jobs always 
>>>>>>>>>> considered
>>>>>>>>>> good.
>>>>>>>>>> +     */
>>>>>>>>>> +    if (bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) {
>>>>>>>>>> +        atomic_inc(&bad->karma);
>>>>>>>>>> +        for (i = DRM_SCHED_PRIORITY_MIN; i <
>>>>>>>>>> DRM_SCHED_PRIORITY_KERNEL;
>>>>>>>>>> +             i++) {
>>>>>>>>>> +            struct drm_sched_rq *rq = &sched->sched_rq[i];
>>>>>>>>>> +
>>>>>>>>>> +            spin_lock(&rq->lock);
>>>>>>>>>> +            list_for_each_entry_safe(entity, tmp, 
>>>>>>>>>> &rq->entities,
>>>>>>>>>> list) {
>>>>>>>>>> +                if (bad->s_fence->scheduled.context ==
>>>>>>>>>> +                    entity->fence_context) {
>>>>>>>>>> +                    if (atomic_read(&bad->karma) >
>>>>>>>>>> + bad->sched->hang_limit)
>>>>>>>>>> +                        if (entity->guilty)
>>>>>>>>>> + atomic_set(entity->guilty, 1);
>>>>>>>>>> +                    break;
>>>>>>>>>> +                }
>>>>>>>>>> +            }
>>>>>>>>>> +            spin_unlock(&rq->lock);
>>>>>>>>>> +            if (&entity->list != &rq->entities)
>>>>>>>>>> +                break;
>>>>>>>>>> +        }
>>>>>>>>>> +    }
>>>>>>>>>> +}
>>>>>>>>>> +EXPORT_SYMBOL(drm_sched_increase_karma);
>>>>>>>>>> +
>>>>>>>>>>        /**
>>>>>>>>>>         * drm_sched_hw_job_reset - stop the scheduler if it
>>>>>>>>>> contains the
>>>>>>>>>> bad job
>>>>>>>>>>         *
>>>>>>>>>> @@ -342,13 +376,22 @@ static void drm_sched_job_timedout(struct
>>>>>>>>>> work_struct *work)
>>>>>>>>>>         * @bad: bad scheduler job
>>>>>>>>>>         *
>>>>>>>>>>         */
>>>>>>>>>> -void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched,
>>>>>>>>>> struct
>>>>>>>>>> drm_sched_job *bad)
>>>>>>>>>> +void drm_sched_stop(struct drm_gpu_scheduler *sched, struct
>>>>>>>>>> drm_sched_job *bad)
>>>>>>>>>>        {
>>>>>>>>>> -    struct drm_sched_job *s_job;
>>>>>>>>>> -    struct drm_sched_entity *entity, *tmp;
>>>>>>>>>> +    struct drm_sched_job *s_job, *last_job;
>>>>>>>>>>            unsigned long flags;
>>>>>>>>>> -    int i;
>>>>>>>>>> +    struct dma_fence *wait_fence =  NULL;
>>>>>>>>>> +    int r;
>>>>>>>>>> +
>>>>>>>>>> +    kthread_park(sched->thread);
>>>>>>>>>>        +    /*
>>>>>>>>>> +     * Verify all the signaled jobs in mirror list are removed
>>>>>>>>>> from
>>>>>>>>>> the ring
>>>>>>>>>> +     * by waiting for their respective scheduler fences to 
>>>>>>>>>> signal.
>>>>>>>>>> +     * Continually  repeat traversing the ring mirror list
>>>>>>>>>> until no
>>>>>>>>>> more signaled
>>>>>>>>>> +     * fences are found
>>>>>>>>>> +     */
>>>>>>>>>> +retry_wait:
>>>>>>>>>> spin_lock_irqsave(&sched->job_list_lock, flags);
>>>>>>>>>>            list_for_each_entry_reverse(s_job,
>>>>>>>>>> &sched->ring_mirror_list,
>>>>>>>>>> node) {
>>>>>>>>>>                if (s_job->s_fence->parent &&
>>>>>>>>>> @@ -357,35 +400,45 @@ void drm_sched_hw_job_reset(struct
>>>>>>>>>> drm_gpu_scheduler *sched, struct drm_sched_jo
>>>>>>>>>> dma_fence_put(s_job->s_fence->parent);
>>>>>>>>>>                    s_job->s_fence->parent = NULL;
>>>>>>>>>> atomic_dec(&sched->hw_rq_count);
>>>>>>>>>> +        } else {
>>>>>>>>>> +             wait_fence =
>>>>>>>>>> dma_fence_get(&s_job->s_fence->finished);
>>>>>>>>>> +             last_job = s_job;
>>>>>>>>>> +             break;
>>>>>>>>>>                }
>>>>>>>>>>            }
>>>>>>>>>> - spin_unlock_irqrestore(&sched->job_list_lock, flags);
>>>>>>>>>>        -    if (bad && bad->s_priority !=
>>>>>>>>>> DRM_SCHED_PRIORITY_KERNEL) {
>>>>>>>>>> -        atomic_inc(&bad->karma);
>>>>>>>>>> -        /* don't increase @bad's karma if it's from KERNEL RQ,
>>>>>>>>>> -         * becuase sometimes GPU hang would cause kernel jobs
>>>>>>>>>> (like
>>>>>>>>>> VM updating jobs)
>>>>>>>>>> -         * corrupt but keep in mind that kernel jobs always
>>>>>>>>>> considered good.
>>>>>>>>>> -         */
>>>>>>>>>> -        for (i = DRM_SCHED_PRIORITY_MIN; i <
>>>>>>>>>> DRM_SCHED_PRIORITY_KERNEL; i++ ) {
>>>>>>>>>> -            struct drm_sched_rq *rq = &sched->sched_rq[i];
>>>>>>>>>> +    /* No signaled jobs in the ring, its safe to proceed to 
>>>>>>>>>> ASIC
>>>>>>>>>> reset */
>>>>>>>>>> +    if (!wait_fence) {
>>>>>>>>>> + spin_unlock_irqrestore(&sched->job_list_lock, flags);
>>>>>>>>>> +        goto done;
>>>>>>>>>> +    }
>>>>>>>>>>        -            spin_lock(&rq->lock);
>>>>>>>>>> -            list_for_each_entry_safe(entity, tmp, 
>>>>>>>>>> &rq->entities,
>>>>>>>>>> list) {
>>>>>>>>>> -                if (bad->s_fence->scheduled.context ==
>>>>>>>>>> entity->fence_context) {
>>>>>>>>>> -                    if (atomic_read(&bad->karma) >
>>>>>>>>>> bad->sched->hang_limit)
>>>>>>>>>> -                        if (entity->guilty)
>>>>>>>>>> - atomic_set(entity->guilty, 1);
>>>>>>>>>> -                    break;
>>>>>>>>>> -                }
>>>>>>>>>> -            }
>>>>>>>>>> -            spin_unlock(&rq->lock);
>>>>>>>>>> -            if (&entity->list != &rq->entities)
>>>>>>>>>> -                break;
>>>>>>>>>> +    /* Restore removed cb since removing again already removed
>>>>>>>>>> cb is
>>>>>>>>>> undefined */
>>>>>>>>>> +    list_for_each_entry_reverse(s_job, 
>>>>>>>>>> &sched->ring_mirror_list,
>>>>>>>>>> node) {
>>>>>>>>>> +        if(s_job == last_job)
>>>>>>>>>> +            break;
>>>>>>>>> Need to double check after the holidays, but you should be able
>>>>>>>>> to use
>>>>>>>>> list_for_each_entry_continue here.
>>>>>>>> I think it should work - kind of traversing back on all the jobs
>>>>>>>> we just
>>>>>>>> removed their callbacks.
>>>>>>> Wrapping your head around stuff again after the holidays sometimes
>>>>>>> shows
>>>>>>> new problems we have completely missed :)
>>>>>>>
>>>>>>> Adding the callbacks again won't work because we have freed up our
>>>>>>> reference to the hardware fence above:
>>>>>>>> dma_fence_put(s_job->s_fence->parent);
>>>>>>>>                    s_job->s_fence->parent = NULL;
>>>>>>> We need to drop this or we would never be able to re-add the fences
>>>>>> Yea, that a big miss on my side...
>>>>>>
>>>>>>> .
>>>>>>>
>>>>>>> But I'm still not sure if we shouldn't rely on the correct order of
>>>>>>> signaling and simplify this instead.
>>>>>> As you said before, once we switched to signaling the parent from 
>>>>>> the
>>>>>> interrupt context instead of scheduled work no danger of race there
>>>>> Correction here - once we switched removing the job from mirror_ring
>>>>> list directly in interrupt context instead later from scheduled work
>>>> Ok, so let's stick with the approach of only waiting for the first
>>>> signaled one found.
>>>>
>>>> But we need to remove setting the parent fence to NULL or otherwise we
>>>> won't be able to add the callback ever again.
>>>>
>>>> Christian.
>>> But we will not be adding the cb back in drm_sched_stop anymore, now we
>>> are only going to add back the cb in drm_sched_startr after rerunning
>>> those jobs in drm_sched_resubmit_jobs and assign them a new parent 
>>> there
>>> anyway.
>>
>> Yeah, but when we find that we don't need to reset anything anymore 
>> then adding the callbacks again won't be possible any more.
>>
>> Christian.
>
> I am not sure I understand it, can u point me to example of how this 
> will happen ? I am attaching my latest patches with waiting only for 
> the last job's fence here just so we are on same page regarding the code.
>
> Andrey
>
>>
>>>
>>> Andrey
>>>
>>>>> Andrey
>>>>>
>>>>>> , so
>>>>>> what if we submit job A and after it job B and B completes before A
>>>>>> (like the sync dependency test in libdrm amdgpu tests but without
>>>>>> adding
>>>>>> explicit dependency to the second command on the first) I believe 
>>>>>> that
>>>>>> still in this case job B's parent (HW) fence will not be signaled
>>>>>> before
>>>>>> job A completes since EOP event is not signaled until the entire 
>>>>>> pipe
>>>>>> completed and flushed it's cashes including job A. So from this
>>>>>> seems to
>>>>>> me that indeed it's enough to wait for the last inserted job's 
>>>>>> parent
>>>>>> (HW) fence in ring mirror list to signal.
>>>>>> Let me know what you think on that.
>>>>>>
>>>>>> P.S V5 is not the last iteration and there was V6 series.
>>>>>>
>>>>>> Andrey
>>>>>>
>>>>>>> Regards,
>>>>>>> Christian.
>>>>>>>
>>>>>>>> Andrey
>>>>>>>>
>>>>>>>>>> +
>>>>>>>>>> +        if (s_job->s_fence->parent) {
>>>>>>>>>> +            r = dma_fence_add_callback(s_job->s_fence->parent,
>>>>>>>>>> + &s_job->s_fence->cb,
>>>>>>>>>> + drm_sched_process_job);
>>>>>>>>>> +            if (r)
>>>>>>>>>> +                DRM_ERROR("fence restore callback failed 
>>>>>>>>>> (%d)\n",
>>>>>>>>>> +                                      r);
>>>>>>>>> When you fail to add the callback this means that you need to 
>>>>>>>>> call
>>>>>>>>> call drm_sched_process_job manually here.
>>>>>>>>>
>>>>>>>>>>                }
>>>>>>>>>>            }
>>>>>>>>>> + spin_unlock_irqrestore(&sched->job_list_lock, flags);
>>>>>>>>>> +
>>>>>>>>>> +    dma_fence_wait(wait_fence, false);
>>>>>>>>>> +    dma_fence_put(wait_fence);
>>>>>>>>>> +    wait_fence = NULL;
>>>>>>>>>> +
>>>>>>>>>> +    goto retry_wait;
>>>>>>>>>> +
>>>>>>>>>> +done:
>>>>>>>>>> +    return;
>>>>>>>>> Drop the done: label and return directly above.
>>>>>>>>>
>>>>>>>>> Apart from all those nit picks that starts to look like it should
>>>>>>>>> work,
>>>>>>>>> Christian.
>>>>>>>>>
>>>>>>>>>>        }
>>>>>>>>>> -EXPORT_SYMBOL(drm_sched_hw_job_reset);
>>>>>>>>>> +EXPORT_SYMBOL(drm_sched_stop);
>>>>>>>>>>          /**
>>>>>>>>>>         * drm_sched_job_recovery - recover jobs after a reset
>>>>>>>>>> @@ -393,33 +446,21 @@ EXPORT_SYMBOL(drm_sched_hw_job_reset);
>>>>>>>>>>         * @sched: scheduler instance
>>>>>>>>>>         *
>>>>>>>>>>         */
>>>>>>>>>> -void drm_sched_job_recovery(struct drm_gpu_scheduler *sched)
>>>>>>>>>> +void drm_sched_start(struct drm_gpu_scheduler *sched, bool
>>>>>>>>>> full_recovery)
>>>>>>>>>>        {
>>>>>>>>>>            struct drm_sched_job *s_job, *tmp;
>>>>>>>>>> -    bool found_guilty = false;
>>>>>>>>>>            unsigned long flags;
>>>>>>>>>>            int r;
>>>>>>>>>>        +    if (!full_recovery)
>>>>>>>>>> +        goto unpark;
>>>>>>>>>> +
>>>>>>>>>> spin_lock_irqsave(&sched->job_list_lock, flags);
>>>>>>>>>>            list_for_each_entry_safe(s_job, tmp,
>>>>>>>>>> &sched->ring_mirror_list,
>>>>>>>>>> node) {
>>>>>>>>>>                struct drm_sched_fence *s_fence = s_job->s_fence;
>>>>>>>>>> -        struct dma_fence *fence;
>>>>>>>>>> -        uint64_t guilty_context;
>>>>>>>>>> -
>>>>>>>>>> -        if (!found_guilty && atomic_read(&s_job->karma) >
>>>>>>>>>> sched->hang_limit) {
>>>>>>>>>> -            found_guilty = true;
>>>>>>>>>> -            guilty_context = s_job->s_fence->scheduled.context;
>>>>>>>>>> -        }
>>>>>>>>>> -
>>>>>>>>>> -        if (found_guilty && 
>>>>>>>>>> s_job->s_fence->scheduled.context ==
>>>>>>>>>> guilty_context)
>>>>>>>>>> - dma_fence_set_error(&s_fence->finished, -ECANCELED);
>>>>>>>>>> -
>>>>>>>>>> - spin_unlock_irqrestore(&sched->job_list_lock, flags);
>>>>>>>>>> -        fence = sched->ops->run_job(s_job);
>>>>>>>>>> -        atomic_inc(&sched->hw_rq_count);
>>>>>>>>>> +        struct dma_fence *fence = s_job->s_fence->parent;
>>>>>>>>>>                  if (fence) {
>>>>>>>>>> -            s_fence->parent = dma_fence_get(fence);
>>>>>>>>>>                    r = dma_fence_add_callback(fence, 
>>>>>>>>>> &s_fence->cb,
>>>>>>>>>> drm_sched_process_job);
>>>>>>>>>>                    if (r == -ENOENT)
>>>>>>>>>> @@ -427,18 +468,47 @@ void drm_sched_job_recovery(struct
>>>>>>>>>> drm_gpu_scheduler *sched)
>>>>>>>>>>                    else if (r)
>>>>>>>>>>                        DRM_ERROR("fence add callback failed 
>>>>>>>>>> (%d)\n",
>>>>>>>>>>                              r);
>>>>>>>>>> -            dma_fence_put(fence);
>>>>>>>>>> -        } else {
>>>>>>>>>> -            if (s_fence->finished.error < 0)
>>>>>>>>>> - drm_sched_expel_job_unlocked(s_job);
>>>>>>>>>> +        } else
>>>>>>>>>>                    drm_sched_process_job(NULL, &s_fence->cb);
>>>>>>>>>> -        }
>>>>>>>>>> - spin_lock_irqsave(&sched->job_list_lock, flags);
>>>>>>>>>>            }
>>>>>>>>>> +
>>>>>>>>>>            drm_sched_start_timeout(sched);
>>>>>>>>>> spin_unlock_irqrestore(&sched->job_list_lock, flags);
>>>>>>>>>> +
>>>>>>>>>> +unpark:
>>>>>>>>>> +    kthread_unpark(sched->thread);
>>>>>>>>>> +}
>>>>>>>>>> +EXPORT_SYMBOL(drm_sched_start);
>>>>>>>>>> +
>>>>>>>>>> +/**
>>>>>>>>>> + * drm_sched_resubmit_jobs - helper to relunch job from mirror
>>>>>>>>>> ring
>>>>>>>>>> list
>>>>>>>>>> + *
>>>>>>>>>> + * @sched: scheduler instance
>>>>>>>>>> + *
>>>>>>>>>> + */
>>>>>>>>>> +void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched)
>>>>>>>>>> +{
>>>>>>>>>> +    struct drm_sched_job *s_job, *tmp;
>>>>>>>>>> +    uint64_t guilty_context;
>>>>>>>>>> +    bool found_guilty = false;
>>>>>>>>>> +
>>>>>>>>>> +    /*TODO DO we need spinlock here ? */
>>>>>>>>>> +    list_for_each_entry_safe(s_job, tmp, 
>>>>>>>>>> &sched->ring_mirror_list,
>>>>>>>>>> node) {
>>>>>>>>>> +        struct drm_sched_fence *s_fence = s_job->s_fence;
>>>>>>>>>> +
>>>>>>>>>> +        if (!found_guilty && atomic_read(&s_job->karma) >
>>>>>>>>>> sched->hang_limit) {
>>>>>>>>>> +            found_guilty = true;
>>>>>>>>>> +            guilty_context = s_job->s_fence->scheduled.context;
>>>>>>>>>> +        }
>>>>>>>>>> +
>>>>>>>>>> +        if (found_guilty && 
>>>>>>>>>> s_job->s_fence->scheduled.context ==
>>>>>>>>>> guilty_context)
>>>>>>>>>> + dma_fence_set_error(&s_fence->finished, -ECANCELED);
>>>>>>>>>> +
>>>>>>>>>> +        s_job->s_fence->parent = sched->ops->run_job(s_job);
>>>>>>>>>> +        atomic_inc(&sched->hw_rq_count);
>>>>>>>>>> +    }
>>>>>>>>>>        }
>>>>>>>>>> -EXPORT_SYMBOL(drm_sched_job_recovery);
>>>>>>>>>> +EXPORT_SYMBOL(drm_sched_resubmit_jobs);
>>>>>>>>>>          /**
>>>>>>>>>>         * drm_sched_job_init - init a scheduler job
>>>>>>>>>> @@ -634,26 +704,14 @@ static int drm_sched_main(void *param)
>>>>>>>>>>                        DRM_ERROR("fence add callback failed 
>>>>>>>>>> (%d)\n",
>>>>>>>>>>                              r);
>>>>>>>>>>                    dma_fence_put(fence);
>>>>>>>>>> -        } else {
>>>>>>>>>> -            if (s_fence->finished.error < 0)
>>>>>>>>>> - drm_sched_expel_job_unlocked(sched_job);
>>>>>>>>>> +        } else
>>>>>>>>>>                    drm_sched_process_job(NULL, &s_fence->cb);
>>>>>>>>>> -        }
>>>>>>>>>> wake_up(&sched->job_scheduled);
>>>>>>>>>>            }
>>>>>>>>>>            return 0;
>>>>>>>>>>        }
>>>>>>>>>>        -static void drm_sched_expel_job_unlocked(struct
>>>>>>>>>> drm_sched_job *s_job)
>>>>>>>>>> -{
>>>>>>>>>> -    struct drm_gpu_scheduler *sched = s_job->sched;
>>>>>>>>>> -
>>>>>>>>>> -    spin_lock(&sched->job_list_lock);
>>>>>>>>>> -    list_del_init(&s_job->node);
>>>>>>>>>> -    spin_unlock(&sched->job_list_lock);
>>>>>>>>>> -}
>>>>>>>>>> -
>>>>>>>>>>        /**
>>>>>>>>>>         * drm_sched_init - Init a gpu scheduler instance
>>>>>>>>>>         *
>>>>>>>>>> diff --git a/drivers/gpu/drm/v3d/v3d_sched.c
>>>>>>>>>> b/drivers/gpu/drm/v3d/v3d_sched.c
>>>>>>>>>> index 445b2ef..f76d9ed 100644
>>>>>>>>>> --- a/drivers/gpu/drm/v3d/v3d_sched.c
>>>>>>>>>> +++ b/drivers/gpu/drm/v3d/v3d_sched.c
>>>>>>>>>> @@ -178,18 +178,22 @@ v3d_job_timedout(struct drm_sched_job
>>>>>>>>>> *sched_job)
>>>>>>>>>>            for (q = 0; q < V3D_MAX_QUEUES; q++) {
>>>>>>>>>>                struct drm_gpu_scheduler *sched =
>>>>>>>>>> &v3d->queue[q].sched;
>>>>>>>>>>        -        kthread_park(sched->thread);
>>>>>>>>>> -        drm_sched_hw_job_reset(sched, (sched_job->sched == 
>>>>>>>>>> sched ?
>>>>>>>>>> +        drm_sched_stop(sched, (sched_job->sched == sched ?
>>>>>>>>>>                                   sched_job : NULL));
>>>>>>>>>> +
>>>>>>>>>> +        if(sched_job)
>>>>>>>>>> + drm_sched_increase_karma(sched_job);
>>>>>>>>>>            }
>>>>>>>>>>              /* get the GPU back into the init state */
>>>>>>>>>>            v3d_reset(v3d);
>>>>>>>>>>        +    for (q = 0; q < V3D_MAX_QUEUES; q++)
>>>>>>>>>> + drm_sched_resubmit_jobs(sched_job->sched);
>>>>>>>>>> +
>>>>>>>>>>            /* Unblock schedulers and restart their jobs. */
>>>>>>>>>>            for (q = 0; q < V3D_MAX_QUEUES; q++) {
>>>>>>>>>> - drm_sched_job_recovery(&v3d->queue[q].sched);
>>>>>>>>>> - kthread_unpark(v3d->queue[q].sched.thread);
>>>>>>>>>> + drm_sched_start(&v3d->queue[q].sched, true);
>>>>>>>>>>            }
>>>>>>>>>> mutex_unlock(&v3d->reset_lock);
>>>>>>>>>> diff --git a/include/drm/gpu_scheduler.h
>>>>>>>>>> b/include/drm/gpu_scheduler.h
>>>>>>>>>> index 47e1979..5ab2d97 100644
>>>>>>>>>> --- a/include/drm/gpu_scheduler.h
>>>>>>>>>> +++ b/include/drm/gpu_scheduler.h
>>>>>>>>>> @@ -175,6 +175,7 @@ struct drm_sched_fence
>>>>>>>>>> *to_drm_sched_fence(struct
>>>>>>>>>> dma_fence *f);
>>>>>>>>>>         *               finished to remove the job from the
>>>>>>>>>>         * @drm_gpu_scheduler.ring_mirror_list.
>>>>>>>>>>         * @node: used to append this struct to the
>>>>>>>>>> @drm_gpu_scheduler.ring_mirror_list.
>>>>>>>>>> + * @finish_node: used in a list to wait on before resetting the
>>>>>>>>>> scheduler
>>>>>>>>>>         * @id: a unique id assigned to each job scheduled on the
>>>>>>>>>> scheduler.
>>>>>>>>>>         * @karma: increment on every hang caused by this job. If
>>>>>>>>>> this
>>>>>>>>>> exceeds the hang
>>>>>>>>>>         *         limit of the scheduler then the job is marked
>>>>>>>>>> guilty and
>>>>>>>>>> will not
>>>>>>>>>> @@ -193,6 +194,7 @@ struct drm_sched_job {
>>>>>>>>>>            struct dma_fence_cb        finish_cb;
>>>>>>>>>>            struct work_struct finish_work;
>>>>>>>>>>            struct list_head        node;
>>>>>>>>>> +    struct list_head        finish_node;
>>>>>>>>>>            uint64_t            id;
>>>>>>>>>>            atomic_t            karma;
>>>>>>>>>>            enum drm_sched_priority s_priority;
>>>>>>>>>> @@ -298,9 +300,11 @@ int drm_sched_job_init(struct drm_sched_job
>>>>>>>>>> *job,
>>>>>>>>>>                       void *owner);
>>>>>>>>>>        void drm_sched_job_cleanup(struct drm_sched_job *job);
>>>>>>>>>>        void drm_sched_wakeup(struct drm_gpu_scheduler *sched);
>>>>>>>>>> -void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched,
>>>>>>>>>> -                struct drm_sched_job *job);
>>>>>>>>>> -void drm_sched_job_recovery(struct drm_gpu_scheduler *sched);
>>>>>>>>>> +void drm_sched_stop(struct drm_gpu_scheduler *sched,
>>>>>>>>>> +            struct drm_sched_job *job);
>>>>>>>>>> +void drm_sched_start(struct drm_gpu_scheduler *sched, bool
>>>>>>>>>> full_recovery);
>>>>>>>>>> +void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched);
>>>>>>>>>> +void drm_sched_increase_karma(struct drm_sched_job *bad);
>>>>>>>>>>        bool drm_sched_dependency_optimized(struct dma_fence* 
>>>>>>>>>> fence,
>>>>>>>>>>                            struct drm_sched_entity *entity);
>>>>>>>>>>        void drm_sched_fault(struct drm_gpu_scheduler *sched);
>>>>>>>>> _______________________________________________
>>>>>>>>> amd-gfx mailing list
>>>>>>>>> amd-gfx at lists.freedesktop.org
>>>>>>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>>>>>> _______________________________________________
>>>>>>> amd-gfx mailing list
>>>>>>> amd-gfx at lists.freedesktop.org
>>>>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>>>>> _______________________________________________
>>>>>> dri-devel mailing list
>>>>>> dri-devel at lists.freedesktop.org
>>>>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>>>>> _______________________________________________
>>>>> dri-devel mailing list
>>>>> dri-devel at lists.freedesktop.org
>>>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx at lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>
>