[RFC PATCH] drm/amdgpu: allocate entities on demand

Tue Nov 26 14:41:59 UTC 2019

Thanks Christian for reviewing it.  I will try to cleanup 
drm_sched_entity_set_priority and come up with another patch.

On 11/26/19 10:45 AM, Christian König wrote:
> It looks like a start, but there numerous things which needs to be fixed.
>
> Question number one is: What's that good for? Entities are not the 
> problem here. The real issue is the fence ring and the rq_list.
>
> The rq_list could actually be made constant since it should never be 
> changed by the entity. It is only changed for backward compatibility 
> in drm_sched_entity_set_priority().
>
> So I would start there and cleanup the drm_sched_entity_set_priority() 
> to actually just set a new constant rq list instead.
>
> Then we could embed the fences in amdgpu_ctx_entity as dynamic array 
> at the end of the structure.
>
> And last we can start to dynamic allocate and initialize the 
> amdgpu_ctx_entity() structures.
>
> Regards,
> Christian.
>
> Am 26.11.19 um 00:31 schrieb Nirmoy:
>> Ran amdgpu_test(drm) successfully multiple times to test this. But I 
>> am pretty sure I am missing some corner case here.
>>
>>
>> Regards,
>>
>> Nirmoy
>>
>> On 11/26/19 12:17 AM, Nirmoy Das wrote:
>>> Currently we pre-allocate entities for all the HW IPs on
>>> context creation and some of which are might never be used.
>>>
>>> This patch tries to resolve entity wastage by creating entities
>>> for a HW IP only when it is required.
>>>
>>> Signed-off-by: Nirmoy Das <nirmoy.das at amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 142 
>>> ++++++++++++++----------
>>>   1 file changed, 81 insertions(+), 61 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>>> index a0d3d7b756eb..0a390ebe873f 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>>> @@ -74,7 +74,7 @@ static int amdgpu_ctx_init(struct amdgpu_device 
>>> *adev,
>>>                  struct amdgpu_ctx *ctx)
>>>   {
>>>       unsigned num_entities = amdgpu_ctx_total_num_entities();
>>> -    unsigned i, j, k;
>>> +    unsigned i;
>>>       int r;
>>>         if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
>>> @@ -103,7 +103,7 @@ static int amdgpu_ctx_init(struct amdgpu_device 
>>> *adev,
>>>       for (i = 0; i < num_entities; ++i) {
>>>           struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
>>>   -        entity->sequence = 1;
>>> +        entity->sequence = -1;
>>>           entity->fences = &ctx->fences[amdgpu_sched_jobs * i];
>>>       }
>>>       for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
>>> @@ -120,25 +120,59 @@ static int amdgpu_ctx_init(struct 
>>> amdgpu_device *adev,
>>>       ctx->init_priority = priority;
>>>       ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
>>>   -    for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
>>> -        struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
>>> -        struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
>>> -        unsigned num_rings = 0;
>>> -        unsigned num_rqs = 0;
>>> +    return 0;
>>> +
>>> +error_free_fences:
>>> +    kfree(ctx->fences);
>>> +    ctx->fences = NULL;
>>> +    return r;
>>> +}
>>> +
>>> +static void amdgpu_ctx_fini(struct kref *ref)
>>> +{
>>> +    struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, 
>>> refcount);
>>> +    unsigned num_entities = amdgpu_ctx_total_num_entities();
>>> +    struct amdgpu_device *adev = ctx->adev;
>>> +    unsigned i, j;
>>> +
>>> +    if (!adev)
>>> +        return;
>>> +
>>> +    for (i = 0; i < num_entities; ++i)
>>> +        for (j = 0; j < amdgpu_sched_jobs; ++j)
>>> +            dma_fence_put(ctx->entities[0][i].fences[j]);
>>> +    kfree(ctx->fences);
>>> +    kfree(ctx->entities[0]);
>>> +
>>> +    mutex_destroy(&ctx->lock);
>>> +
>>> +    kfree(ctx);
>>> +}
>>>   -        switch (i) {
>>> +
>>> +int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip)
>> This should be a static function which I forgot to change
>>> +{
>>> +    struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
>>> +    struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
>>> +    struct amdgpu_device *adev = ctx->adev;
>>> +    unsigned num_rings = 0;
>>> +    unsigned num_rqs = 0;
>>> +    unsigned i, j;
>>> +    int r = 0;
>>> +
>>> +    switch (hw_ip) {
>>>           case AMDGPU_HW_IP_GFX:
>>>               rings[0] = &adev->gfx.gfx_ring[0];
>>>               num_rings = 1;
>>>               break;
>>>           case AMDGPU_HW_IP_COMPUTE:
>>> -            for (j = 0; j < adev->gfx.num_compute_rings; ++j)
>>> -                rings[j] = &adev->gfx.compute_ring[j];
>>> +            for (i = 0; i < adev->gfx.num_compute_rings; ++i)
>>> +                rings[i] = &adev->gfx.compute_ring[i];
>>>               num_rings = adev->gfx.num_compute_rings;
>>>               break;
>>>           case AMDGPU_HW_IP_DMA:
>>> -            for (j = 0; j < adev->sdma.num_instances; ++j)
>>> -                rings[j] = &adev->sdma.instance[j].ring;
>>> +            for (i = 0; i < adev->sdma.num_instances; ++i)
>>> +                rings[i] = &adev->sdma.instance[i].ring;
>>>               num_rings = adev->sdma.num_instances;
>>>               break;
>>>           case AMDGPU_HW_IP_UVD:
>>> @@ -154,80 +188,59 @@ static int amdgpu_ctx_init(struct 
>>> amdgpu_device *adev,
>>>               num_rings = 1;
>>>               break;
>>>           case AMDGPU_HW_IP_VCN_DEC:
>>> -            for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
>>> -                if (adev->vcn.harvest_config & (1 << j))
>>> +            for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
>>> +                if (adev->vcn.harvest_config & (1 << i))
>>>                       continue;
>>> -                rings[num_rings++] = &adev->vcn.inst[j].ring_dec;
>>> +                rings[num_rings++] = &adev->vcn.inst[i].ring_dec;
>>>               }
>>>               break;
>>>           case AMDGPU_HW_IP_VCN_ENC:
>>> -            for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
>>> -                if (adev->vcn.harvest_config & (1 << j))
>>> +            for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
>>> +                if (adev->vcn.harvest_config & (1 << i))
>>>                       continue;
>>> -                for (k = 0; k < adev->vcn.num_enc_rings; ++k)
>>> -                    rings[num_rings++] = 
>>> &adev->vcn.inst[j].ring_enc[k];
>>> +                for (j = 0; j < adev->vcn.num_enc_rings; ++j)
>>> +                    rings[num_rings++] = 
>>> &adev->vcn.inst[i].ring_enc[j];
>>>               }
>>>               break;
>>>           case AMDGPU_HW_IP_VCN_JPEG:
>>> -            for (j = 0; j < adev->jpeg.num_jpeg_inst; ++j) {
>>> -                if (adev->vcn.harvest_config & (1 << j))
>>> +            for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
>>> +                if (adev->vcn.harvest_config & (1 << i))
>>>                       continue;
>>> -                rings[num_rings++] = &adev->jpeg.inst[j].ring_dec;
>>> +                rings[num_rings++] = &adev->jpeg.inst[i].ring_dec;
>>>               }
>>>               break;
>>> -        }
>>> -
>>> -        for (j = 0; j < num_rings; ++j) {
>>> -            if (!rings[j]->adev)
>>> -                continue;
>>> +    }
>>>   -            rqs[num_rqs++] = &rings[j]->sched.sched_rq[priority];
>>> -        }
>>> +    for (i = 0; i < num_rings; ++i) {
>>> +        if (!rings[i]->adev)
>>> +            continue;
>>>   -        for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
>>> -            r = drm_sched_entity_init(&ctx->entities[i][j].entity,
>>> -                          rqs, num_rqs, &ctx->guilty);
>>> -        if (r)
>>> -            goto error_cleanup_entities;
>>> +        rqs[num_rqs++] = 
>>> &rings[i]->sched.sched_rq[ctx->init_priority];
>>>       }
>>>   +    for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i)
>>> +        r = drm_sched_entity_init(&ctx->entities[hw_ip][i].entity,
>>> +                rqs, num_rqs, &ctx->guilty);
>>> +    if (r)
>>> +        goto error_cleanup_entities;
>>> +
>>> +    for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i)
>>> +        ctx->entities[hw_ip][i].sequence = 1;
>>> +
>>>       return 0;
>>>     error_cleanup_entities:
>>> -    for (i = 0; i < num_entities; ++i)
>>> - drm_sched_entity_destroy(&ctx->entities[0][i].entity);
>>> -    kfree(ctx->entities[0]);
>>> +    for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i)
>>> + drm_sched_entity_destroy(&ctx->entities[hw_ip][i].entity);
>>>   -error_free_fences:
>>> -    kfree(ctx->fences);
>>> -    ctx->fences = NULL;
>>>       return r;
>>>   }
>>>   -static void amdgpu_ctx_fini(struct kref *ref)
>>> -{
>>> -    struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, 
>>> refcount);
>>> -    unsigned num_entities = amdgpu_ctx_total_num_entities();
>>> -    struct amdgpu_device *adev = ctx->adev;
>>> -    unsigned i, j;
>>> -
>>> -    if (!adev)
>>> -        return;
>>> -
>>> -    for (i = 0; i < num_entities; ++i)
>>> -        for (j = 0; j < amdgpu_sched_jobs; ++j)
>>> -            dma_fence_put(ctx->entities[0][i].fences[j]);
>>> -    kfree(ctx->fences);
>>> -    kfree(ctx->entities[0]);
>>> -
>>> -    mutex_destroy(&ctx->lock);
>>> -
>>> -    kfree(ctx);
>>> -}
>>> -
>>>   int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 
>>> instance,
>>>                 u32 ring, struct drm_sched_entity **entity)
>>>   {
>>> +    int r;
>>> +
>>>       if (hw_ip >= AMDGPU_HW_IP_NUM) {
>>>           DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
>>>           return -EINVAL;
>>> @@ -244,6 +257,13 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx 
>>> *ctx, u32 hw_ip, u32 instance,
>>>           return -EINVAL;
>>>       }
>>>   +    if (ctx->entities[hw_ip][ring].sequence == -1) {
>>> +        r = amdgpu_ctx_init_entity(ctx, hw_ip);
>>> +
>>> +        if (r)
>>> +            return r;
>>> +    }
>>> +
>>>       *entity = &ctx->entities[hw_ip][ring].entity;
>>>       return 0;
>>>   }
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx at lists.freedesktop.org
>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=02%7C01%7CNirmoy.Das%40amd.com%7C6a5d5bd84fa44716094008d772556bbf%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637103583527091961&sdata=iIvNTsPHlYa0xoayCjVJMZ4cgBhhvIa6hVKfmUYYhbY%3D&reserved=0 
>>
>