[PATCH 4/4] drm/scheduler: do not keep a copy of sched list

Mon Dec 9 14:09:10 UTC 2019

I can seeÂ  one issue with this. I am ignoring/removing changes from

commit 2a84e48e9712ea8591a10dd59d59ccab3d54efd6 drm/amdgpu: Only add rqs 
for initialized rings.

I wonder if we can handle that differently.

Regards,

Nirmoy

On 12/9/19 2:56 PM, Nirmoy wrote:
> Hi Christian,
>
> I got a different idea, a bit more simple let me know what do you 
> think about it:
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 50bab33cba39..8de4de4f7a43 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -870,6 +870,7 @@ struct amdgpu_device {
> Â Â Â Â Â Â Â  u64Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  fence_context;
> Â Â Â Â Â Â Â  unsignedÂ Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  num_rings;
> Â Â Â Â Â Â Â  struct amdgpu_ringÂ Â Â Â Â Â Â Â Â Â Â Â Â  *rings[AMDGPU_MAX_RINGS];
> +Â Â Â Â Â  struct drm_gpu_scheduler *rings_sched_list[AMDGPU_MAX_RINGS];
> Â Â Â Â Â Â Â  boolÂ Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ib_pool_ready;
> Â Â Â Â Â Â Â  struct amdgpu_sa_managerÂ Â Â Â Â Â Â  ring_tmp_bo;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> index 1d6850af9908..52b3a5d85a1d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> @@ -122,9 +122,8 @@ static int amdgpu_ctx_init(struct amdgpu_device 
> *adev,
>
> Â Â Â Â Â Â Â  for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â  struct drm_gpu_scheduler *sched_list[AMDGPU_MAX_RINGS]
> +Â Â Â Â Â Â Â Â Â Â Â Â Â  struct drm_gpu_scheduler **sched_list;
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  unsigned num_rings = 0;
> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â  unsigned num_rqs = 0;
>
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  switch (i) {
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  case AMDGPU_HW_IP_GFX:
> @@ -177,17 +176,11 @@ static int amdgpu_ctx_init(struct amdgpu_device 
> *adev,
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  break;
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  }
>
> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â  for (j = 0; j < num_rings; ++j) {
> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  if (!rings[j]->adev)
> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  continue;
> -
> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  sched_list[num_rqs++] = &rings[j]->sched;
> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â  }
> -
> +Â Â Â Â Â Â Â Â Â Â Â Â Â  sched_list= adev->rings_sched_list+rings[0]->idx;
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  r = 
> drm_sched_entity_init(&ctx->entities[i][j].entity,
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  priority, sched_list,
> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  num_rqs, &ctx->guilty);
> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  num_rings, 
> &ctx->guilty);
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  if (r)
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  goto error_cleanup_entities;
> Â Â Â Â Â Â Â  }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> index 377fe20bce23..e8cfa357e445 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> @@ -480,6 +480,8 @@ int amdgpu_fence_driver_init_ring(struct 
> amdgpu_ring *ring,
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ring->name);
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  return r;
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  }
> +
> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â  adev->rings_sched_list[ring->idx] = &ring->sched;
> Â Â Â Â Â Â Â  }
>
> Â Â Â Â Â Â Â  return 0;
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
> b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> index bd9ed33bab43..bfe36199ffed 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> @@ -1744,8 +1744,11 @@ static int sdma_v4_0_sw_init(void *handle)
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  AMDGPU_SDMA_IRQ_INSTANCE0 + i);
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  if (r)
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  return r;
> +Â Â Â Â Â Â  }
> +
> +Â Â Â Â Â Â  if (adev->sdma.has_page_queue) {
> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â  for (i = 0; i < adev->sdma.num_instances; i++) {
>
> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â  if (adev->sdma.has_page_queue) {
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ring = &adev->sdma.instance[i].page;
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ring->ring_obj = NULL;
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ring->use_doorbell = true;
>
> It relies on contiguous ring initialization that's why I had to 
> changeÂ  sdma_v4_0.c so that we do ring_init(sdma0, sdma1, page0, page1}
>
> instead of ring_init{sdma0, page0, sdma1, page1}
>
>
> Regards,
>
> Nirmoy
>
> On 12/9/19 1:20 PM, Christian KÃ¶nig wrote:
>> Yes, you need to do this for the SDMA as well but in general that 
>> looks like the idea I had in mind as well.
>>
>> I would do it like this:
>>
>> 1. Change the special case when you only get one scheduler for an 
>> entity to drop the pointer to the scheduler list.
>> Â Â Â  This way we always use the same scheduler for the entity and can 
>> pass in the array on the stack.
>>
>> 2. Change all callers which use more than one scheduler in the list 
>> to pass in pointers which are not allocated on the stack.
>> Â Â Â  This obviously also means that we build the list of schedulers 
>> for each type only once during device init and not for each context 
>> init.
>>
>> 3. Make the scheduler list const and drop the kcalloc()/kfree() from 
>> the entity code.
>>
>> Regards,
>> Christian.
>>
>> Am 08.12.19 um 20:57 schrieb Nirmoy:
>>>
>>> On 12/6/19 8:41 PM, Christian KÃ¶nig wrote:
>>>> Am 06.12.19 um 18:33 schrieb Nirmoy Das:
>>>>> entity should not keep copy and maintain sched list for
>>>>> itself.
>>>>
>>>> That is a good step, but we need to take this further.
>>>
>>> How aboutÂ  something like ?
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>> index 0ae0a2715b0d..a71ee084b47a 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>> @@ -269,8 +269,10 @@ struct amdgpu_gfx {
>>> Â Â Â Â Â Â Â  boolÂ Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  me_fw_write_wait;
>>> Â Â Â Â Â Â Â  boolÂ Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  cp_fw_write_wait;
>>> Â Â Â Â Â Â Â  struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS];
>>> +Â Â Â Â Â Â  struct drm_gpu_scheduler *gfx_sched_list[AMDGPU_MAX_GFX_RINGS];
>>> Â Â Â Â Â Â Â  unsignedÂ Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  num_gfx_rings;
>>> Â Â Â Â Â Â Â  struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
>>> +Â Â Â Â Â Â  struct drm_gpu_scheduler 
>>> *compute_sched_list[AMDGPU_MAX_COMPUTE_RINGS];
>>> Â Â Â Â Â Â Â  unsignedÂ Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  num_compute_rings;
>>> Â Â Â Â Â Â Â  struct amdgpu_irq_srcÂ Â Â Â Â Â Â Â Â Â  eop_irq;
>>> Â Â Â Â Â Â Â  struct amdgpu_irq_srcÂ Â Â Â Â Â Â Â Â Â  priv_reg_irq;
>>>
>>>
>>> Regards,
>>>
>>> Nirmoy
>>>
>>