[RFC PATCH] drm/amdgpu: allocate entities on demand
Nirmoy
nirmodas at amd.com
Wed Dec 4 19:15:38 UTC 2019
I saw y
On 12/3/19 6:47 PM, Christian König wrote:
> Am 03.12.19 um 18:33 schrieb Christian König:
>> Am 03.12.19 um 16:02 schrieb Nirmoy:
>>> Hi Christian,
>>>
>>> On 12/2/19 3:59 PM, Christian König wrote:
>>>> Am 02.12.19 um 15:43 schrieb Nirmoy:
>>>>>
>>>>> Do you mean something like
>>>>>
>>>>> diff --git a/include/drm/gpu_scheduler.h
>>>>> b/include/drm/gpu_scheduler.h
>>>>> index 684692a8ed76..ac67f8f098fa 100644
>>>>> --- a/include/drm/gpu_scheduler.h
>>>>> +++ b/include/drm/gpu_scheduler.h
>>>>> @@ -81,7 +81,7 @@ enum drm_sched_priority {
>>>>> Â struct drm_sched_entity {
>>>>>        struct list_head               list;
>>>>>        struct drm_sched_rq            *rq;
>>>>> -      struct drm_sched_rq            **rq_list;
>>>>> +     struct drm_gpu_scheduler       **sched;
>>>>>        unsigned int                   num_rq_list;
>>>>>        spinlock_t                     rq_lock;
>>>>
>>>> Yes, exactly. Problem is that I'm not 100% sure if that really
>>>> works with all users of the rq_list.
>>>
>>> currently rq_list users does two main tasks.
>>>
>>> 1Â change rq priority for a context on user requests
>>>
>>> 2 helps drm scheduler to find rq with least load.
>>>
>>> Can you please check the bellow diff it doesn't really work because
>>> I get some kernel panic. But do you think
>>>
>>> it is matching your idea ?
>>
>> Yes, that looks exactly like what I had in mind.
>
> BTW: What does the matching amdgpu change look like?
>
> Keep in mind that you can't allocate the list of schedulers on the
> stack any more.
>
> That might be the reason for you kernel panic.
Just saw your email. I was scratching my head for a while because of
this memory corruption.
You are right it was because of stack memory. I am keeping kcalloc in
drm_sched_entity_init();
Regards,
Nirmoy
>
> Christian.
>
>>
>> Christian.
>>
>>>
>>> test at install:~/linux> git diff
>>> drivers/gpu/drm/scheduler/sched_entity.c |tee
>>> diff --git a/drivers/gpu/drm/scheduler/sched_entity.c
>>> b/drivers/gpu/drm/scheduler/sched_entity.c
>>> index 1a5153197fe9..0bbd8ddd6c83 100644
>>> --- a/drivers/gpu/drm/scheduler/sched_entity.c
>>> +++ b/drivers/gpu/drm/scheduler/sched_entity.c
>>> @@ -37,9 +37,9 @@
>>> Â * submit to HW ring.
>>> Â *
>>> Â * @entity: scheduler entity to init
>>> - * @rq_list: the list of run queue on which jobs from this
>>> + * @sched_list: the list of drm scheds on which jobs from this
>>> Â *Â Â Â Â Â Â Â Â Â Â entity can be submitted
>>> - * @num_rq_list: number of run queue in rq_list
>>> + * @num_sched_list: number of drm sched in sched_list
>>> Â * @guilty: atomic_t set to 1 when a job on this queue
>>> Â *Â Â Â Â Â Â Â Â Â is found to be guilty causing a timeout
>>> Â *
>>> @@ -49,30 +49,24 @@
>>> Â * Returns 0 on success or a negative error code on failure.
>>> Â */
>>> Â int drm_sched_entity_init(struct drm_sched_entity *entity,
>>> -Â Â Â Â Â Â Â Â Â Â struct drm_sched_rq **rq_list,
>>> -Â Â Â Â Â Â Â Â Â Â unsigned int num_rq_list,
>>> -Â Â Â Â Â Â Â Â Â Â atomic_t *guilty)
>>> +Â Â Â Â Â Â Â Â Â Â struct drm_gpu_scheduler **sched_list,
>>> +Â Â Â Â Â Â Â Â Â Â unsigned int num_sched_list,
>>> +Â Â Â Â Â Â Â Â Â Â atomic_t *guilty, enum drm_sched_priority priority)
>>> Â {
>>> -Â Â Â int i;
>>>
>>> -Â Â Â if (!(entity && rq_list && (num_rq_list == 0 || rq_list[0])))
>>> +Â Â Â if (!(entity && sched_list && (num_sched_list == 0 ||
>>> sched_list[0])))
>>> Â Â Â Â Â Â Â return -EINVAL;
>>>
>>> Â Â Â Â memset(entity, 0, sizeof(struct drm_sched_entity));
>>> Â Â Â Â INIT_LIST_HEAD(&entity->list);
>>> Â Â Â Â entity->rq = NULL;
>>> Â Â Â Â entity->guilty = guilty;
>>> -Â Â Â entity->num_rq_list = num_rq_list;
>>> -Â Â Â entity->rq_list = kcalloc(num_rq_list, sizeof(struct
>>> drm_sched_rq *),
>>> -Â Â Â Â Â Â Â Â Â Â Â Â GFP_KERNEL);
>>> -Â Â Â if (!entity->rq_list)
>>> -Â Â Â Â Â Â return -ENOMEM;
>>> -
>>> -Â Â Â for (i = 0; i < num_rq_list; ++i)
>>> -Â Â Â Â Â Â entity->rq_list[i] = rq_list[i];
>>> +Â Â Â entity->num_sched_list = num_sched_list;
>>> +Â Â Â entity->sched_list =Â sched_list
>>> +Â Â Â entity->priority = priority;
>>>
>>> -Â Â Â if (num_rq_list)
>>> -Â Â Â Â Â Â entity->rq = rq_list[0];
>>> +Â Â Â if (num_sched_list)
>>> +Â Â Â Â Â Â entity->rq =
>>> &entity->sched_list[0]->sched_rq[entity->priority];
>>>
>>> Â Â Â Â entity->last_scheduled = NULL;
>>>
>>> @@ -136,10 +130,10 @@ drm_sched_entity_get_free_sched(struct
>>> drm_sched_entity *entity)
>>> Â Â Â Â unsigned int min_jobs = UINT_MAX, num_jobs;
>>> Â Â Â Â int i;
>>>
>>> -Â Â Â for (i = 0; i < entity->num_rq_list; ++i) {
>>> -Â Â Â Â Â Â struct drm_gpu_scheduler *sched = entity->rq_list[i]->sched;
>>> +Â Â Â for (i = 0; i < entity->num_sched_list; ++i) {
>>> +Â Â Â Â Â Â struct drm_gpu_scheduler *sched = entity->sched_list[i];
>>>
>>> -Â Â Â Â Â Â if (!entity->rq_list[i]->sched->ready) {
>>> +Â Â Â Â Â Â if (!entity->sched_list[i]->ready) {
>>> Â Â Â Â Â Â Â Â Â Â DRM_WARN("sched%s is not ready, skipping", sched->name);
>>> Â Â Â Â Â Â Â Â Â Â continue;
>>> Â Â Â Â Â Â Â }
>>> @@ -147,7 +141,7 @@ drm_sched_entity_get_free_sched(struct
>>> drm_sched_entity *entity)
>>> Â Â Â Â Â Â Â num_jobs = atomic_read(&sched->num_jobs);
>>> Â Â Â Â Â Â Â if (num_jobs < min_jobs) {
>>> Â Â Â Â Â Â Â Â Â Â min_jobs = num_jobs;
>>> -Â Â Â Â Â Â Â Â Â rq = entity->rq_list[i];
>>> +Â Â Â Â Â Â Â Â Â rq = &entity->sched_list[i]->sched_rq[entity->priority];
>>> Â Â Â Â Â Â Â }
>>> Â Â Â Â }
>>>
>>> @@ -304,7 +298,6 @@ void drm_sched_entity_fini(struct
>>> drm_sched_entity *entity)
>>>
>>> Â Â Â Â dma_fence_put(entity->last_scheduled);
>>> Â Â Â Â entity->last_scheduled = NULL;
>>> -Â Â Â kfree(entity->rq_list);
>>> Â }
>>> Â EXPORT_SYMBOL(drm_sched_entity_fini);
>>>
>>> @@ -372,8 +365,9 @@ void drm_sched_entity_set_priority(struct
>>> drm_sched_entity *entity,
>>> Â Â Â Â unsigned int i;
>>>
>>> Â Â Â Â spin_lock(&entity->rq_lock);
>>> -
>>> -Â Â Â for (i = 0; i < entity->num_rq_list; ++i)
>>> +//TODO
>>> +/*
>>> +Â Â Â for (i = 0; i < entity->num_sched_list; ++i)
>>> Â drm_sched_entity_set_rq_priority(&entity->rq_list[i], priority);
>>>
>>> Â Â Â Â if (entity->rq) {
>>> @@ -381,7 +375,7 @@ void drm_sched_entity_set_priority(struct
>>> drm_sched_entity *entity,
>>> Â Â Â Â Â Â Â drm_sched_entity_set_rq_priority(&entity->rq, priority);
>>> Â Â Â Â Â Â Â drm_sched_rq_add_entity(entity->rq, entity);
>>> Â Â Â Â }
>>> -
>>> +*/
>>> Â Â Â Â spin_unlock(&entity->rq_lock);
>>> Â }
>>> Â EXPORT_SYMBOL(drm_sched_entity_set_priority);
>>> @@ -486,7 +480,7 @@ void drm_sched_entity_select_rq(struct
>>> drm_sched_entity *entity)
>>> Â Â Â Â struct dma_fence *fence;
>>> Â Â Â Â struct drm_sched_rq *rq;
>>>
>>> -Â Â Â if (spsc_queue_count(&entity->job_queue) || entity->num_rq_list
>>> <= 1)
>>> +Â Â Â if (spsc_queue_count(&entity->job_queue) ||
>>> entity->num_sched_list <= 1)
>>> Â Â Â Â Â Â Â return;
>>>
>>> Â Â Â Â fence = READ_ONCE(entity->last_scheduled);
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx at lists.freedesktop.org
>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=02%7C01%7CNirmoy.Das%40amd.com%7C1949d715ef4d44e739a808d77818ee32%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637109920777492433&sdata=eOrmeKaUPvJLFL44w%2F6rFqU0KBo1lseA52%2FQNG9bMII%3D&reserved=0
>>
>
More information about the amd-gfx
mailing list