[PATCH 4/4] drm/scheduler: do not keep a copy of sched list

Mon Dec 9 15:37:25 UTC 2019

Yeah, that won't work very well.

During bring-up we also often have the case that we can't correctly 
initialize all engines, e.g. only the first SDMA comes up etc.

So better stick with the initial approach of constructing the scheduler 
array for each engine type which needs it.

Regards,
Christian.

Am 09.12.19 um 15:09 schrieb Nirmoy:
> I can see one issue with this. I am ignoring/removing changes from
>
> commit 2a84e48e9712ea8591a10dd59d59ccab3d54efd6 drm/amdgpu: Only add 
> rqs for initialized rings.
>
> I wonder if we can handle that differently.
>
> Regards,
>
> Nirmoy
>
> On 12/9/19 2:56 PM, Nirmoy wrote:
>> Hi Christian,
>>
>> I got a different idea, a bit more simple let me know what do you 
>> think about it:
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 50bab33cba39..8de4de4f7a43 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -870,6 +870,7 @@ struct amdgpu_device {
>> Â Â Â Â Â Â Â  u64Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  fence_context;
>> Â Â Â Â Â Â Â  unsignedÂ Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  num_rings;
>> Â Â Â Â Â Â Â  struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
>> +Â Â Â Â Â  struct drm_gpu_scheduler *rings_sched_list[AMDGPU_MAX_RINGS];
>> Â Â Â Â Â Â Â  boolÂ Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ib_pool_ready;
>> Â Â Â Â Â Â Â  struct amdgpu_sa_managerÂ Â Â Â Â Â Â  ring_tmp_bo;
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> index 1d6850af9908..52b3a5d85a1d 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> @@ -122,9 +122,8 @@ static int amdgpu_ctx_init(struct amdgpu_device 
>> *adev,
>>
>> Â Â Â Â Â Â Â  for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â  struct drm_gpu_scheduler *sched_list[AMDGPU_MAX_RINGS]
>> +Â Â Â Â Â Â Â Â Â Â Â Â Â  struct drm_gpu_scheduler **sched_list;
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  unsigned num_rings = 0;
>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â  unsigned num_rqs = 0;
>>
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  switch (i) {
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  case AMDGPU_HW_IP_GFX:
>> @@ -177,17 +176,11 @@ static int amdgpu_ctx_init(struct amdgpu_device 
>> *adev,
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  break;
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  }
>>
>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â  for (j = 0; j < num_rings; ++j) {
>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  if (!rings[j]->adev)
>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  continue;
>> -
>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  sched_list[num_rqs++] = &rings[j]->sched;
>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â  }
>> -
>> +Â Â Â Â Â Â Â Â Â Â Â Â Â  sched_list= adev->rings_sched_list+rings[0]->idx;
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  r = 
>> drm_sched_entity_init(&ctx->entities[i][j].entity,
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  priority, sched_list,
>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  num_rqs, 
>> &ctx->guilty);
>> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  num_rings, 
>> &ctx->guilty);
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  if (r)
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  goto error_cleanup_entities;
>> Â Â Â Â Â Â Â  }
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
>> index 377fe20bce23..e8cfa357e445 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
>> @@ -480,6 +480,8 @@ int amdgpu_fence_driver_init_ring(struct 
>> amdgpu_ring *ring,
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ring->name);
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  return r;
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  }
>> +
>> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â  adev->rings_sched_list[ring->idx] = &ring->sched;
>> Â Â Â Â Â Â Â  }
>>
>> Â Â Â Â Â Â Â  return 0;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>> index bd9ed33bab43..bfe36199ffed 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>> @@ -1744,8 +1744,11 @@ static int sdma_v4_0_sw_init(void *handle)
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  AMDGPU_SDMA_IRQ_INSTANCE0 + i);
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  if (r)
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  return r;
>> +Â Â Â Â Â Â  }
>> +
>> +Â Â Â Â Â Â  if (adev->sdma.has_page_queue) {
>> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â  for (i = 0; i < adev->sdma.num_instances; i++) {
>>
>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â  if (adev->sdma.has_page_queue) {
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ring = &adev->sdma.instance[i].page;
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ring->ring_obj = NULL;
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ring->use_doorbell = true;
>>
>> It relies on contiguous ring initialization that's why I had to 
>> changeÂ  sdma_v4_0.c so that we do ring_init(sdma0, sdma1, page0, page1}
>>
>> instead of ring_init{sdma0, page0, sdma1, page1}
>>
>>
>> Regards,
>>
>> Nirmoy
>>
>> On 12/9/19 1:20 PM, Christian KÃ¶nig wrote:
>>> Yes, you need to do this for the SDMA as well but in general that 
>>> looks like the idea I had in mind as well.
>>>
>>> I would do it like this:
>>>
>>> 1. Change the special case when you only get one scheduler for an 
>>> entity to drop the pointer to the scheduler list.
>>> Â Â Â  This way we always use the same scheduler for the entity and can 
>>> pass in the array on the stack.
>>>
>>> 2. Change all callers which use more than one scheduler in the list 
>>> to pass in pointers which are not allocated on the stack.
>>> Â Â Â  This obviously also means that we build the list of schedulers 
>>> for each type only once during device init and not for each context 
>>> init.
>>>
>>> 3. Make the scheduler list const and drop the kcalloc()/kfree() from 
>>> the entity code.
>>>
>>> Regards,
>>> Christian.
>>>
>>> Am 08.12.19 um 20:57 schrieb Nirmoy:
>>>>
>>>> On 12/6/19 8:41 PM, Christian KÃ¶nig wrote:
>>>>> Am 06.12.19 um 18:33 schrieb Nirmoy Das:
>>>>>> entity should not keep copy and maintain sched list for
>>>>>> itself.
>>>>>
>>>>> That is a good step, but we need to take this further.
>>>>
>>>> How aboutÂ  something like ?
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>>> index 0ae0a2715b0d..a71ee084b47a 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>>> @@ -269,8 +269,10 @@ struct amdgpu_gfx {
>>>> Â Â Â Â Â Â Â  boolÂ Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  me_fw_write_wait;
>>>> Â Â Â Â Â Â Â  boolÂ Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  cp_fw_write_wait;
>>>> Â Â Â Â Â Â Â  struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS];
>>>> +Â Â Â Â Â Â  struct drm_gpu_scheduler 
>>>> *gfx_sched_list[AMDGPU_MAX_GFX_RINGS];
>>>> Â Â Â Â Â Â Â  unsignedÂ Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  num_gfx_rings;
>>>> Â Â Â Â Â Â Â  struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
>>>> +Â Â Â Â Â Â  struct drm_gpu_scheduler 
>>>> *compute_sched_list[AMDGPU_MAX_COMPUTE_RINGS];
>>>> Â Â Â Â Â Â Â  unsignedÂ Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  num_compute_rings;
>>>> Â Â Â Â Â Â Â  struct amdgpu_irq_srcÂ Â Â Â Â Â Â Â Â Â  eop_irq;
>>>> Â Â Â Â Â Â Â  struct amdgpu_irq_srcÂ Â Â Â Â Â Â Â Â Â  priv_reg_irq;
>>>>
>>>>
>>>> Regards,
>>>>
>>>> Nirmoy
>>>>
>>>