[PATCH v2 1/4] drm/sched: Fix entities with 0 rqs.

Thu Feb 14 09:08:49 UTC 2019

Am 13.02.19 um 22:03 schrieb Alex Deucher via amd-gfx:
> On Wed, Jan 30, 2019 at 5:43 AM Christian König
> <ckoenig.leichtzumerken at gmail.com> wrote:
>> Am 30.01.19 um 02:53 schrieb Bas Nieuwenhuizen:
>>> Some blocks in amdgpu can have 0 rqs.
>>>
>>> Job creation already fails with -ENOENT when entity->rq is NULL,
>>> so jobs cannot be pushed. Without a rq there is no scheduler to
>>> pop jobs, and rq selection already does the right thing with a
>>> list of length 0.
>>>
>>> So the operations we need to fix are:
>>>     - Creation, do not set rq to rq_list[0] if the list can have length 0.
>>>     - Do not flush any jobs when there is no rq.
>>>     - On entity destruction handle the rq = NULL case.
>>>     - on set_priority, do not try to change the rq if it is NULL.
>>>
>>> Signed-off-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
>> One minor comment on patch #2, apart from that the series is
>> Reviewed-by: Christian König <christian.koenig at amd.com>.
>>
>> I'm going to make the change on #2 and pick them up for inclusion in
>> amd-staging-drm-next.
> Hi Christian,
>
> I haven't seen these land yet.  Just want to make sure they don't fall
> through the cracks.

Thanks for the reminder, I'm really having trouble catching up on 
applying patches lately.

Christian.

>
> Alex
>
>> Thanks for the help,
>> Christian.
>>
>>> ---
>>>    drivers/gpu/drm/scheduler/sched_entity.c | 39 ++++++++++++++++--------
>>>    1 file changed, 26 insertions(+), 13 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
>>> index 4463d3826ecb..8e31b6628d09 100644
>>> --- a/drivers/gpu/drm/scheduler/sched_entity.c
>>> +++ b/drivers/gpu/drm/scheduler/sched_entity.c
>>> @@ -52,12 +52,12 @@ int drm_sched_entity_init(struct drm_sched_entity *entity,
>>>    {
>>>        int i;
>>>
>>> -     if (!(entity && rq_list && num_rq_list > 0 && rq_list[0]))
>>> +     if (!(entity && rq_list && (num_rq_list == 0 || rq_list[0])))
>>>                return -EINVAL;
>>>
>>>        memset(entity, 0, sizeof(struct drm_sched_entity));
>>>        INIT_LIST_HEAD(&entity->list);
>>> -     entity->rq = rq_list[0];
>>> +     entity->rq = NULL;
>>>        entity->guilty = guilty;
>>>        entity->num_rq_list = num_rq_list;
>>>        entity->rq_list = kcalloc(num_rq_list, sizeof(struct drm_sched_rq *),
>>> @@ -67,6 +67,10 @@ int drm_sched_entity_init(struct drm_sched_entity *entity,
>>>
>>>        for (i = 0; i < num_rq_list; ++i)
>>>                entity->rq_list[i] = rq_list[i];
>>> +
>>> +     if (num_rq_list)
>>> +             entity->rq = rq_list[0];
>>> +
>>>        entity->last_scheduled = NULL;
>>>
>>>        spin_lock_init(&entity->rq_lock);
>>> @@ -165,6 +169,9 @@ long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout)
>>>        struct task_struct *last_user;
>>>        long ret = timeout;
>>>
>>> +     if (!entity->rq)
>>> +             return 0;
>>> +
>>>        sched = entity->rq->sched;
>>>        /**
>>>         * The client will not queue more IBs during this fini, consume existing
>>> @@ -264,20 +271,24 @@ static void drm_sched_entity_kill_jobs(struct drm_sched_entity *entity)
>>>     */
>>>    void drm_sched_entity_fini(struct drm_sched_entity *entity)
>>>    {
>>> -     struct drm_gpu_scheduler *sched;
>>> +     struct drm_gpu_scheduler *sched = NULL;
>>>
>>> -     sched = entity->rq->sched;
>>> -     drm_sched_rq_remove_entity(entity->rq, entity);
>>> +     if (entity->rq) {
>>> +             sched = entity->rq->sched;
>>> +             drm_sched_rq_remove_entity(entity->rq, entity);
>>> +     }
>>>
>>>        /* Consumption of existing IBs wasn't completed. Forcefully
>>>         * remove them here.
>>>         */
>>>        if (spsc_queue_peek(&entity->job_queue)) {
>>> -             /* Park the kernel for a moment to make sure it isn't processing
>>> -              * our enity.
>>> -              */
>>> -             kthread_park(sched->thread);
>>> -             kthread_unpark(sched->thread);
>>> +             if (sched) {
>>> +                     /* Park the kernel for a moment to make sure it isn't processing
>>> +                      * our enity.
>>> +                      */
>>> +                     kthread_park(sched->thread);
>>> +                     kthread_unpark(sched->thread);
>>> +             }
>>>                if (entity->dependency) {
>>>                        dma_fence_remove_callback(entity->dependency,
>>>                                                  &entity->cb);
>>> @@ -362,9 +373,11 @@ void drm_sched_entity_set_priority(struct drm_sched_entity *entity,
>>>        for (i = 0; i < entity->num_rq_list; ++i)
>>>                drm_sched_entity_set_rq_priority(&entity->rq_list[i], priority);
>>>
>>> -     drm_sched_rq_remove_entity(entity->rq, entity);
>>> -     drm_sched_entity_set_rq_priority(&entity->rq, priority);
>>> -     drm_sched_rq_add_entity(entity->rq, entity);
>>> +     if (entity->rq) {
>>> +             drm_sched_rq_remove_entity(entity->rq, entity);
>>> +             drm_sched_entity_set_rq_priority(&entity->rq, priority);
>>> +             drm_sched_rq_add_entity(entity->rq, entity);
>>> +     }
>>>
>>>        spin_unlock(&entity->rq_lock);
>>>    }
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx