[PATCH v3 1/2] drm/sched: Refactor ring mirror list handling.
Koenig, Christian
Christian.Koenig at amd.com
Mon Dec 17 18:16:29 UTC 2018
Am 17.12.18 um 17:57 schrieb Grodzovsky, Andrey:
>
> On 12/17/2018 10:27 AM, Christian König wrote:
>> Am 10.12.18 um 22:43 schrieb Andrey Grodzovsky:
>>> Decauple sched threads stop and start and ring mirror
>>> list handling from the policy of what to do about the
>>> guilty jobs.
>>> When stoppping the sched thread and detaching sched fences
>>> from non signaled HW fenes wait for all signaled HW fences
>>> to complete before rerunning the jobs.
>>>
>>> v2: Fix resubmission of guilty job into HW after refactoring.
>>>
>>> Suggested-by: Christian Koenig <Christian.Koenig at amd.com>
>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 17 +++--
>>> drivers/gpu/drm/etnaviv/etnaviv_sched.c | 8 +--
>>> drivers/gpu/drm/scheduler/sched_main.c | 110
>>> ++++++++++++++++++-----------
>>> drivers/gpu/drm/v3d/v3d_sched.c | 11 +--
>>> include/drm/gpu_scheduler.h | 10 ++-
>>> 5 files changed, 95 insertions(+), 61 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> index ef36cc5..42111d5 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> @@ -3292,17 +3292,16 @@ static int
>>> amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
>>> /* block all schedulers and reset given job's ring */
>>> for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
>>> struct amdgpu_ring *ring = adev->rings[i];
>>> + bool park_only = job && job->base.sched != &ring->sched;
>>> if (!ring || !ring->sched.thread)
>>> continue;
>>> - kthread_park(ring->sched.thread);
>>> + drm_sched_stop(&ring->sched, job ? &job->base : NULL,
>>> park_only);
>>> - if (job && job->base.sched != &ring->sched)
>>> + if (park_only)
>>> continue;
>>> - drm_sched_hw_job_reset(&ring->sched, job ? &job->base :
>>> NULL);
>>> -
>>> /* after all hw jobs are reset, hw fence is meaningless, so
>>> force_completion */
>>> amdgpu_fence_driver_force_completion(ring);
>>> }
>>> @@ -3445,6 +3444,7 @@ static void
>>> amdgpu_device_post_asic_reset(struct amdgpu_device *adev,
>>> struct amdgpu_job *job)
>>> {
>>> int i;
>>> + bool unpark_only;
>>> for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
>>> struct amdgpu_ring *ring = adev->rings[i];
>>> @@ -3456,10 +3456,13 @@ static void
>>> amdgpu_device_post_asic_reset(struct amdgpu_device *adev,
>>> * or all rings (in the case @job is NULL)
>>> * after above amdgpu_reset accomplished
>>> */
>>> - if ((!job || job->base.sched == &ring->sched) &&
>>> !adev->asic_reset_res)
>>> - drm_sched_job_recovery(&ring->sched);
>>> + unpark_only = (job && job->base.sched != &ring->sched) ||
>>> + adev->asic_reset_res;
>>> +
>>> + if (!unpark_only)
>>> + drm_sched_resubmit_jobs(&ring->sched);
>>> - kthread_unpark(ring->sched.thread);
>>> + drm_sched_start(&ring->sched, unpark_only);
>>> }
>>> if (!amdgpu_device_has_dc_support(adev)) {
>>> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
>>> b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
>>> index 49a6763..fab3b51 100644
>>> --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
>>> +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
>>> @@ -109,16 +109,16 @@ static void etnaviv_sched_timedout_job(struct
>>> drm_sched_job *sched_job)
>>> }
>>> /* block scheduler */
>>> - kthread_park(gpu->sched.thread);
>>> - drm_sched_hw_job_reset(&gpu->sched, sched_job);
>>> + drm_sched_stop(&gpu->sched, sched_job, false);
>>> /* get the GPU back into the init state */
>>> etnaviv_core_dump(gpu);
>>> etnaviv_gpu_recover_hang(gpu);
>>> + drm_sched_resubmit_jobs(&gpu->sched);
>>> +
>>> /* restart scheduler after GPU is usable again */
>>> - drm_sched_job_recovery(&gpu->sched);
>>> - kthread_unpark(gpu->sched.thread);
>>> + drm_sched_start(&gpu->sched);
>>> }
>>> static void etnaviv_sched_free_job(struct drm_sched_job *sched_job)
>>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c
>>> b/drivers/gpu/drm/scheduler/sched_main.c
>>> index dbb6906..cdf95e2 100644
>>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>>> @@ -60,8 +60,6 @@
>>> static void drm_sched_process_job(struct dma_fence *f, struct
>>> dma_fence_cb *cb);
>>> -static void drm_sched_expel_job_unlocked(struct drm_sched_job
>>> *s_job);
>>> -
>>> /**
>>> * drm_sched_rq_init - initialize a given run queue struct
>>> *
>>> @@ -342,13 +340,21 @@ static void drm_sched_job_timedout(struct
>>> work_struct *work)
>>> * @bad: bad scheduler job
>>> *
>>> */
>>> -void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched, struct
>>> drm_sched_job *bad)
>>> +void drm_sched_stop(struct drm_gpu_scheduler *sched, struct
>>> drm_sched_job *bad,
>>> + bool park_only)
>>> {
>>> struct drm_sched_job *s_job;
>>> struct drm_sched_entity *entity, *tmp;
>>> unsigned long flags;
>>> + struct list_head wait_list;
>>> int i;
>>> + kthread_park(sched->thread);
>>> + if (park_only)
>>> + return;
>> Removing the callback needs to be done for all engines, not just the
>> one where the bad job is on.
> Is it because you assume that during full GPU reset we might kill
> healthy jobs who were still in progress in HW pipe and so we need to
> manually recover them
> just as we do with the guilty job's engine ?
Yes, exactly.
Christian.
>
> Andrey
>
>>> +
>>> + INIT_LIST_HEAD(&wait_list);
>>> +
>>> spin_lock_irqsave(&sched->job_list_lock, flags);
>>> list_for_each_entry_reverse(s_job, &sched->ring_mirror_list,
>>> node) {
>>> if (s_job->s_fence->parent &&
>>> @@ -358,9 +364,24 @@ void drm_sched_hw_job_reset(struct
>>> drm_gpu_scheduler *sched, struct drm_sched_jo
>>> s_job->s_fence->parent = NULL;
>>> atomic_dec(&sched->hw_rq_count);
>>> }
>>> + else {
>>> + /* TODO Is it get/put neccessey here ? */
>>> + dma_fence_get(&s_job->s_fence->finished);
>>> + list_add(&s_job->finish_node, &wait_list);
>>> + }
>>> }
>>> spin_unlock_irqrestore(&sched->job_list_lock, flags);
>>> + /*
>>> + * Verify all the signaled jobs in mirror list are removed from
>>> the ring
>>> + * We rely on the fact that any finish_work in progress will
>>> wait for this
>>> + * handler to complete before releasing all of the jobs we iterate.
>>> + */
>>> + list_for_each_entry(s_job, &wait_list, finish_node) {
>>> + dma_fence_wait(&s_job->s_fence->finished, false);
>>> + dma_fence_put(&s_job->s_fence->finished);
>>> + }
>>> +
>> Increasing a bad jobs karma should be a separate function.
>>
>> Christian.
>>
>>> if (bad && bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) {
>>> atomic_inc(&bad->karma);
>>> /* don't increase @bad's karma if it's from KERNEL RQ,
>>> @@ -385,7 +406,7 @@ void drm_sched_hw_job_reset(struct
>>> drm_gpu_scheduler *sched, struct drm_sched_jo
>>> }
>>> }
>>> }
>>> -EXPORT_SYMBOL(drm_sched_hw_job_reset);
>>> +EXPORT_SYMBOL(drm_sched_stop);
>>> /**
>>> * drm_sched_job_recovery - recover jobs after a reset
>>> @@ -393,33 +414,21 @@ EXPORT_SYMBOL(drm_sched_hw_job_reset);
>>> * @sched: scheduler instance
>>> *
>>> */
>>> -void drm_sched_job_recovery(struct drm_gpu_scheduler *sched)
>>> +void drm_sched_start(struct drm_gpu_scheduler *sched, bool unpark_only)
>>> {
>>> struct drm_sched_job *s_job, *tmp;
>>> - bool found_guilty = false;
>>> unsigned long flags;
>>> int r;
>>> - spin_lock_irqsave(&sched->job_list_lock, flags);
>>> + if (unpark_only)
>>> + goto unpark;
>>> +
>>> + spin_lock_irqsave(&sched->job_list_lock, flags);
>>> list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list,
>>> node) {
>>> struct drm_sched_fence *s_fence = s_job->s_fence;
>>> - struct dma_fence *fence;
>>> - uint64_t guilty_context;
>>> -
>>> - if (!found_guilty && atomic_read(&s_job->karma) >
>>> sched->hang_limit) {
>>> - found_guilty = true;
>>> - guilty_context = s_job->s_fence->scheduled.context;
>>> - }
>>> -
>>> - if (found_guilty && s_job->s_fence->scheduled.context ==
>>> guilty_context)
>>> - dma_fence_set_error(&s_fence->finished, -ECANCELED);
>>> -
>>> - spin_unlock_irqrestore(&sched->job_list_lock, flags);
>>> - fence = sched->ops->run_job(s_job);
>>> - atomic_inc(&sched->hw_rq_count);
>>> + struct dma_fence *fence = s_job->s_fence->parent;
>>> if (fence) {
>>> - s_fence->parent = dma_fence_get(fence);
>>> r = dma_fence_add_callback(fence, &s_fence->cb,
>>> drm_sched_process_job);
>>> if (r == -ENOENT)
>>> @@ -427,18 +436,47 @@ void drm_sched_job_recovery(struct
>>> drm_gpu_scheduler *sched)
>>> else if (r)
>>> DRM_ERROR("fence add callback failed (%d)\n",
>>> r);
>>> - dma_fence_put(fence);
>>> - } else {
>>> - if (s_fence->finished.error < 0)
>>> - drm_sched_expel_job_unlocked(s_job);
>>> + } else
>>> drm_sched_process_job(NULL, &s_fence->cb);
>>> - }
>>> - spin_lock_irqsave(&sched->job_list_lock, flags);
>>> }
>>> +
>>> drm_sched_start_timeout(sched);
>>> spin_unlock_irqrestore(&sched->job_list_lock, flags);
>>> +
>>> +unpark:
>>> + kthread_unpark(sched->thread);
>>> }
>>> -EXPORT_SYMBOL(drm_sched_job_recovery);
>>> +EXPORT_SYMBOL(drm_sched_start);
>>> +
>>> +/**
>>> + * drm_sched_resubmit_jobs - helper to relunch job from mirror ring
>>> list
>>> + *
>>> + * @sched: scheduler instance
>>> + *
>>> + */
>>> +void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched)
>>> +{
>>> + struct drm_sched_job *s_job, *tmp;
>>> + uint64_t guilty_context;
>>> + bool found_guilty = false;
>>> +
>>> + /*TODO DO we need spinlock here ? */
>>> + list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list,
>>> node) {
>>> + struct drm_sched_fence *s_fence = s_job->s_fence;
>>> +
>>> + if (!found_guilty && atomic_read(&s_job->karma) >
>>> sched->hang_limit) {
>>> + found_guilty = true;
>>> + guilty_context = s_job->s_fence->scheduled.context;
>>> + }
>>> +
>>> + if (found_guilty && s_job->s_fence->scheduled.context ==
>>> guilty_context)
>>> + dma_fence_set_error(&s_fence->finished, -ECANCELED);
>>> +
>>> + s_job->s_fence->parent = sched->ops->run_job(s_job);
>>> + atomic_inc(&sched->hw_rq_count);
>>> + }
>>> +}
>>> +EXPORT_SYMBOL(drm_sched_resubmit_jobs);
>>> /**
>>> * drm_sched_job_init - init a scheduler job
>>> @@ -634,26 +672,14 @@ static int drm_sched_main(void *param)
>>> DRM_ERROR("fence add callback failed (%d)\n",
>>> r);
>>> dma_fence_put(fence);
>>> - } else {
>>> - if (s_fence->finished.error < 0)
>>> - drm_sched_expel_job_unlocked(sched_job);
>>> + } else
>>> drm_sched_process_job(NULL, &s_fence->cb);
>>> - }
>>> wake_up(&sched->job_scheduled);
>>> }
>>> return 0;
>>> }
>>> -static void drm_sched_expel_job_unlocked(struct drm_sched_job *s_job)
>>> -{
>>> - struct drm_gpu_scheduler *sched = s_job->sched;
>>> -
>>> - spin_lock(&sched->job_list_lock);
>>> - list_del_init(&s_job->node);
>>> - spin_unlock(&sched->job_list_lock);
>>> -}
>>> -
>>> /**
>>> * drm_sched_init - Init a gpu scheduler instance
>>> *
>>> diff --git a/drivers/gpu/drm/v3d/v3d_sched.c
>>> b/drivers/gpu/drm/v3d/v3d_sched.c
>>> index 445b2ef..f99346a 100644
>>> --- a/drivers/gpu/drm/v3d/v3d_sched.c
>>> +++ b/drivers/gpu/drm/v3d/v3d_sched.c
>>> @@ -178,18 +178,19 @@ v3d_job_timedout(struct drm_sched_job *sched_job)
>>> for (q = 0; q < V3D_MAX_QUEUES; q++) {
>>> struct drm_gpu_scheduler *sched = &v3d->queue[q].sched;
>>> - kthread_park(sched->thread);
>>> - drm_sched_hw_job_reset(sched, (sched_job->sched == sched ?
>>> - sched_job : NULL));
>>> + drm_sched_stop(sched, (sched_job->sched == sched ?
>>> + sched_job : NULL), false);
>>> }
>>> /* get the GPU back into the init state */
>>> v3d_reset(v3d);
>>> + for (q = 0; q < V3D_MAX_QUEUES; q++)
>>> + drm_sched_resubmit_jobs(sched_job->sched);
>>> +
>>> /* Unblock schedulers and restart their jobs. */
>>> for (q = 0; q < V3D_MAX_QUEUES; q++) {
>>> - drm_sched_job_recovery(&v3d->queue[q].sched);
>>> - kthread_unpark(v3d->queue[q].sched.thread);
>>> + drm_sched_start(&v3d->queue[q].sched, false);
>>> }
>>> mutex_unlock(&v3d->reset_lock);
>>> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
>>> index 47e1979..c94b592 100644
>>> --- a/include/drm/gpu_scheduler.h
>>> +++ b/include/drm/gpu_scheduler.h
>>> @@ -175,6 +175,7 @@ struct drm_sched_fence *to_drm_sched_fence(struct
>>> dma_fence *f);
>>> * finished to remove the job from the
>>> * @drm_gpu_scheduler.ring_mirror_list.
>>> * @node: used to append this struct to the
>>> @drm_gpu_scheduler.ring_mirror_list.
>>> + * @finish_node: used in a list to wait on before resetting the
>>> scheduler
>>> * @id: a unique id assigned to each job scheduled on the scheduler.
>>> * @karma: increment on every hang caused by this job. If this
>>> exceeds the hang
>>> * limit of the scheduler then the job is marked guilty and
>>> will not
>>> @@ -193,6 +194,7 @@ struct drm_sched_job {
>>> struct dma_fence_cb finish_cb;
>>> struct work_struct finish_work;
>>> struct list_head node;
>>> + struct list_head finish_node;
>>> uint64_t id;
>>> atomic_t karma;
>>> enum drm_sched_priority s_priority;
>>> @@ -298,9 +300,11 @@ int drm_sched_job_init(struct drm_sched_job *job,
>>> void *owner);
>>> void drm_sched_job_cleanup(struct drm_sched_job *job);
>>> void drm_sched_wakeup(struct drm_gpu_scheduler *sched);
>>> -void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched,
>>> - struct drm_sched_job *job);
>>> -void drm_sched_job_recovery(struct drm_gpu_scheduler *sched);
>>> +void drm_sched_stop(struct drm_gpu_scheduler *sched,
>>> + struct drm_sched_job *job,
>>> + bool park_only);
>>> +void drm_sched_start(struct drm_gpu_scheduler *sched, bool
>>> unpark_only);
>>> +void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched);
>>> bool drm_sched_dependency_optimized(struct dma_fence* fence,
>>> struct drm_sched_entity *entity);
>>> void drm_sched_fault(struct drm_gpu_scheduler *sched);
More information about the dri-devel
mailing list