[RFC PATCH] drm/amdgpu: allocate entities on demand

Nirmoy nirmodas at amd.com
Wed Dec 4 19:15:38 UTC 2019


I saw y

On 12/3/19 6:47 PM, Christian König wrote:
> Am 03.12.19 um 18:33 schrieb Christian König:
>> Am 03.12.19 um 16:02 schrieb Nirmoy:
>>> Hi Christian,
>>>
>>> On 12/2/19 3:59 PM, Christian König wrote:
>>>> Am 02.12.19 um 15:43 schrieb Nirmoy:
>>>>>
>>>>> Do you mean something like
>>>>>
>>>>> diff --git a/include/drm/gpu_scheduler.h 
>>>>> b/include/drm/gpu_scheduler.h
>>>>> index 684692a8ed76..ac67f8f098fa 100644
>>>>> --- a/include/drm/gpu_scheduler.h
>>>>> +++ b/include/drm/gpu_scheduler.h
>>>>> @@ -81,7 +81,7 @@ enum drm_sched_priority {
>>>>>  struct drm_sched_entity {
>>>>>         struct list_head                list;
>>>>>         struct drm_sched_rq             *rq;
>>>>> -       struct drm_sched_rq             **rq_list;
>>>>> +      struct drm_gpu_scheduler        **sched;
>>>>>         unsigned int                    num_rq_list;
>>>>>         spinlock_t                      rq_lock;
>>>>
>>>> Yes, exactly. Problem is that I'm not 100% sure if that really 
>>>> works with all users of the rq_list.
>>>
>>> currently rq_list users does two main tasks.
>>>
>>> 1  change rq priority for a context on user requests
>>>
>>> 2  helps drm scheduler to find rq  with least load.
>>>
>>> Can you please check the bellow diff it doesn't really work because 
>>> I get some kernel panic. But do you think
>>>
>>> it is matching your idea ?
>>
>> Yes, that looks exactly like what I had in mind.
>
> BTW: What does the matching amdgpu change look like?
>
> Keep in mind that you can't allocate the list of schedulers on the 
> stack any more.
>
> That might be the reason for you kernel panic.

Just saw your email. I was scratching my head for a while because of 
this  memory corruption.

You are right it was because of stack memory. I am keeping kcalloc in 
drm_sched_entity_init();


Regards,

Nirmoy

>
> Christian.
>
>>
>> Christian.
>>
>>>
>>> test at install:~/linux> git diff 
>>> drivers/gpu/drm/scheduler/sched_entity.c |tee
>>> diff --git a/drivers/gpu/drm/scheduler/sched_entity.c 
>>> b/drivers/gpu/drm/scheduler/sched_entity.c
>>> index 1a5153197fe9..0bbd8ddd6c83 100644
>>> --- a/drivers/gpu/drm/scheduler/sched_entity.c
>>> +++ b/drivers/gpu/drm/scheduler/sched_entity.c
>>> @@ -37,9 +37,9 @@
>>>   * submit to HW ring.
>>>   *
>>>   * @entity: scheduler entity to init
>>> - * @rq_list: the list of run queue on which jobs from this
>>> + * @sched_list: the list of drm scheds on which jobs from this
>>>   *           entity can be submitted
>>> - * @num_rq_list: number of run queue in rq_list
>>> + * @num_sched_list: number of drm sched in sched_list
>>>   * @guilty: atomic_t set to 1 when a job on this queue
>>>   *          is found to be guilty causing a timeout
>>>   *
>>> @@ -49,30 +49,24 @@
>>>   * Returns 0 on success or a negative error code on failure.
>>>   */
>>>  int drm_sched_entity_init(struct drm_sched_entity *entity,
>>> -              struct drm_sched_rq **rq_list,
>>> -              unsigned int num_rq_list,
>>> -              atomic_t *guilty)
>>> +              struct drm_gpu_scheduler **sched_list,
>>> +              unsigned int num_sched_list,
>>> +              atomic_t *guilty, enum drm_sched_priority priority)
>>>  {
>>> -    int i;
>>>
>>> -    if (!(entity && rq_list && (num_rq_list == 0 || rq_list[0])))
>>> +    if (!(entity && sched_list && (num_sched_list == 0 || 
>>> sched_list[0])))
>>>          return -EINVAL;
>>>
>>>      memset(entity, 0, sizeof(struct drm_sched_entity));
>>>      INIT_LIST_HEAD(&entity->list);
>>>      entity->rq = NULL;
>>>      entity->guilty = guilty;
>>> -    entity->num_rq_list = num_rq_list;
>>> -    entity->rq_list = kcalloc(num_rq_list, sizeof(struct 
>>> drm_sched_rq *),
>>> -                GFP_KERNEL);
>>> -    if (!entity->rq_list)
>>> -        return -ENOMEM;
>>> -
>>> -    for (i = 0; i < num_rq_list; ++i)
>>> -        entity->rq_list[i] = rq_list[i];
>>> +    entity->num_sched_list = num_sched_list;
>>> +    entity->sched_list =  sched_list
>>> +    entity->priority = priority;
>>>
>>> -    if (num_rq_list)
>>> -        entity->rq = rq_list[0];
>>> +    if (num_sched_list)
>>> +        entity->rq = 
>>> &entity->sched_list[0]->sched_rq[entity->priority];
>>>
>>>      entity->last_scheduled = NULL;
>>>
>>> @@ -136,10 +130,10 @@ drm_sched_entity_get_free_sched(struct 
>>> drm_sched_entity *entity)
>>>      unsigned int min_jobs = UINT_MAX, num_jobs;
>>>      int i;
>>>
>>> -    for (i = 0; i < entity->num_rq_list; ++i) {
>>> -        struct drm_gpu_scheduler *sched = entity->rq_list[i]->sched;
>>> +    for (i = 0; i < entity->num_sched_list; ++i) {
>>> +        struct drm_gpu_scheduler *sched = entity->sched_list[i];
>>>
>>> -        if (!entity->rq_list[i]->sched->ready) {
>>> +        if (!entity->sched_list[i]->ready) {
>>>              DRM_WARN("sched%s is not ready, skipping", sched->name);
>>>              continue;
>>>          }
>>> @@ -147,7 +141,7 @@ drm_sched_entity_get_free_sched(struct 
>>> drm_sched_entity *entity)
>>>          num_jobs = atomic_read(&sched->num_jobs);
>>>          if (num_jobs < min_jobs) {
>>>              min_jobs = num_jobs;
>>> -            rq = entity->rq_list[i];
>>> +            rq = &entity->sched_list[i]->sched_rq[entity->priority];
>>>          }
>>>      }
>>>
>>> @@ -304,7 +298,6 @@ void drm_sched_entity_fini(struct 
>>> drm_sched_entity *entity)
>>>
>>>      dma_fence_put(entity->last_scheduled);
>>>      entity->last_scheduled = NULL;
>>> -    kfree(entity->rq_list);
>>>  }
>>>  EXPORT_SYMBOL(drm_sched_entity_fini);
>>>
>>> @@ -372,8 +365,9 @@ void drm_sched_entity_set_priority(struct 
>>> drm_sched_entity *entity,
>>>      unsigned int i;
>>>
>>>      spin_lock(&entity->rq_lock);
>>> -
>>> -    for (i = 0; i < entity->num_rq_list; ++i)
>>> +//TODO
>>> +/*
>>> +    for (i = 0; i < entity->num_sched_list; ++i)
>>>  drm_sched_entity_set_rq_priority(&entity->rq_list[i], priority);
>>>
>>>      if (entity->rq) {
>>> @@ -381,7 +375,7 @@ void drm_sched_entity_set_priority(struct 
>>> drm_sched_entity *entity,
>>>          drm_sched_entity_set_rq_priority(&entity->rq, priority);
>>>          drm_sched_rq_add_entity(entity->rq, entity);
>>>      }
>>> -
>>> +*/
>>>      spin_unlock(&entity->rq_lock);
>>>  }
>>>  EXPORT_SYMBOL(drm_sched_entity_set_priority);
>>> @@ -486,7 +480,7 @@ void drm_sched_entity_select_rq(struct 
>>> drm_sched_entity *entity)
>>>      struct dma_fence *fence;
>>>      struct drm_sched_rq *rq;
>>>
>>> -    if (spsc_queue_count(&entity->job_queue) || entity->num_rq_list 
>>> <= 1)
>>> +    if (spsc_queue_count(&entity->job_queue) || 
>>> entity->num_sched_list <= 1)
>>>          return;
>>>
>>>      fence = READ_ONCE(entity->last_scheduled);
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx at lists.freedesktop.org
>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=02%7C01%7CNirmoy.Das%40amd.com%7C1949d715ef4d44e739a808d77818ee32%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637109920777492433&sdata=eOrmeKaUPvJLFL44w%2F6rFqU0KBo1lseA52%2FQNG9bMII%3D&reserved=0 
>>
>


More information about the amd-gfx mailing list