[RFC PATCH] drm/amdgpu: allocate entities on demand

Tue Nov 26 09:45:45 UTC 2019

It looks like a start, but there numerous things which needs to be fixed.

Question number one is: What's that good for? Entities are not the 
problem here. The real issue is the fence ring and the rq_list.

The rq_list could actually be made constant since it should never be 
changed by the entity. It is only changed for backward compatibility in 
drm_sched_entity_set_priority().

So I would start there and cleanup the drm_sched_entity_set_priority() 
to actually just set a new constant rq list instead.

Then we could embed the fences in amdgpu_ctx_entity as dynamic array at 
the end of the structure.

And last we can start to dynamic allocate and initialize the 
amdgpu_ctx_entity() structures.

Regards,
Christian.

Am 26.11.19 um 00:31 schrieb Nirmoy:
> Ran amdgpu_test(drm) successfully multiple times to test this. But I 
> am pretty sure I am missing some corner case here.
>
>
> Regards,
>
> Nirmoy
>
> On 11/26/19 12:17 AM, Nirmoy Das wrote:
>> Currently we pre-allocate entities for all the HW IPs on
>> context creation and some of which are might never be used.
>>
>> This patch tries to resolve entity wastage by creating entities
>> for a HW IP only when it is required.
>>
>> Signed-off-by: Nirmoy Das <nirmoy.das at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 142 ++++++++++++++----------
>>   1 file changed, 81 insertions(+), 61 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> index a0d3d7b756eb..0a390ebe873f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> @@ -74,7 +74,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
>>                  struct amdgpu_ctx *ctx)
>>   {
>>       unsigned num_entities = amdgpu_ctx_total_num_entities();
>> -    unsigned i, j, k;
>> +    unsigned i;
>>       int r;
>>         if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
>> @@ -103,7 +103,7 @@ static int amdgpu_ctx_init(struct amdgpu_device 
>> *adev,
>>       for (i = 0; i < num_entities; ++i) {
>>           struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
>>   -        entity->sequence = 1;
>> +        entity->sequence = -1;
>>           entity->fences = &ctx->fences[amdgpu_sched_jobs * i];
>>       }
>>       for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
>> @@ -120,25 +120,59 @@ static int amdgpu_ctx_init(struct amdgpu_device 
>> *adev,
>>       ctx->init_priority = priority;
>>       ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
>>   -    for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
>> -        struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
>> -        struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
>> -        unsigned num_rings = 0;
>> -        unsigned num_rqs = 0;
>> +    return 0;
>> +
>> +error_free_fences:
>> +    kfree(ctx->fences);
>> +    ctx->fences = NULL;
>> +    return r;
>> +}
>> +
>> +static void amdgpu_ctx_fini(struct kref *ref)
>> +{
>> +    struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, 
>> refcount);
>> +    unsigned num_entities = amdgpu_ctx_total_num_entities();
>> +    struct amdgpu_device *adev = ctx->adev;
>> +    unsigned i, j;
>> +
>> +    if (!adev)
>> +        return;
>> +
>> +    for (i = 0; i < num_entities; ++i)
>> +        for (j = 0; j < amdgpu_sched_jobs; ++j)
>> +            dma_fence_put(ctx->entities[0][i].fences[j]);
>> +    kfree(ctx->fences);
>> +    kfree(ctx->entities[0]);
>> +
>> +    mutex_destroy(&ctx->lock);
>> +
>> +    kfree(ctx);
>> +}
>>   -        switch (i) {
>> +
>> +int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip)
> This should be a static function which I forgot to change
>> +{
>> +    struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
>> +    struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
>> +    struct amdgpu_device *adev = ctx->adev;
>> +    unsigned num_rings = 0;
>> +    unsigned num_rqs = 0;
>> +    unsigned i, j;
>> +    int r = 0;
>> +
>> +    switch (hw_ip) {
>>           case AMDGPU_HW_IP_GFX:
>>               rings[0] = &adev->gfx.gfx_ring[0];
>>               num_rings = 1;
>>               break;
>>           case AMDGPU_HW_IP_COMPUTE:
>> -            for (j = 0; j < adev->gfx.num_compute_rings; ++j)
>> -                rings[j] = &adev->gfx.compute_ring[j];
>> +            for (i = 0; i < adev->gfx.num_compute_rings; ++i)
>> +                rings[i] = &adev->gfx.compute_ring[i];
>>               num_rings = adev->gfx.num_compute_rings;
>>               break;
>>           case AMDGPU_HW_IP_DMA:
>> -            for (j = 0; j < adev->sdma.num_instances; ++j)
>> -                rings[j] = &adev->sdma.instance[j].ring;
>> +            for (i = 0; i < adev->sdma.num_instances; ++i)
>> +                rings[i] = &adev->sdma.instance[i].ring;
>>               num_rings = adev->sdma.num_instances;
>>               break;
>>           case AMDGPU_HW_IP_UVD:
>> @@ -154,80 +188,59 @@ static int amdgpu_ctx_init(struct amdgpu_device 
>> *adev,
>>               num_rings = 1;
>>               break;
>>           case AMDGPU_HW_IP_VCN_DEC:
>> -            for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
>> -                if (adev->vcn.harvest_config & (1 << j))
>> +            for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
>> +                if (adev->vcn.harvest_config & (1 << i))
>>                       continue;
>> -                rings[num_rings++] = &adev->vcn.inst[j].ring_dec;
>> +                rings[num_rings++] = &adev->vcn.inst[i].ring_dec;
>>               }
>>               break;
>>           case AMDGPU_HW_IP_VCN_ENC:
>> -            for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
>> -                if (adev->vcn.harvest_config & (1 << j))
>> +            for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
>> +                if (adev->vcn.harvest_config & (1 << i))
>>                       continue;
>> -                for (k = 0; k < adev->vcn.num_enc_rings; ++k)
>> -                    rings[num_rings++] = 
>> &adev->vcn.inst[j].ring_enc[k];
>> +                for (j = 0; j < adev->vcn.num_enc_rings; ++j)
>> +                    rings[num_rings++] = 
>> &adev->vcn.inst[i].ring_enc[j];
>>               }
>>               break;
>>           case AMDGPU_HW_IP_VCN_JPEG:
>> -            for (j = 0; j < adev->jpeg.num_jpeg_inst; ++j) {
>> -                if (adev->vcn.harvest_config & (1 << j))
>> +            for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
>> +                if (adev->vcn.harvest_config & (1 << i))
>>                       continue;
>> -                rings[num_rings++] = &adev->jpeg.inst[j].ring_dec;
>> +                rings[num_rings++] = &adev->jpeg.inst[i].ring_dec;
>>               }
>>               break;
>> -        }
>> -
>> -        for (j = 0; j < num_rings; ++j) {
>> -            if (!rings[j]->adev)
>> -                continue;
>> +    }
>>   -            rqs[num_rqs++] = &rings[j]->sched.sched_rq[priority];
>> -        }
>> +    for (i = 0; i < num_rings; ++i) {
>> +        if (!rings[i]->adev)
>> +            continue;
>>   -        for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
>> -            r = drm_sched_entity_init(&ctx->entities[i][j].entity,
>> -                          rqs, num_rqs, &ctx->guilty);
>> -        if (r)
>> -            goto error_cleanup_entities;
>> +        rqs[num_rqs++] = &rings[i]->sched.sched_rq[ctx->init_priority];
>>       }
>>   +    for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i)
>> +        r = drm_sched_entity_init(&ctx->entities[hw_ip][i].entity,
>> +                rqs, num_rqs, &ctx->guilty);
>> +    if (r)
>> +        goto error_cleanup_entities;
>> +
>> +    for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i)
>> +        ctx->entities[hw_ip][i].sequence = 1;
>> +
>>       return 0;
>>     error_cleanup_entities:
>> -    for (i = 0; i < num_entities; ++i)
>> - drm_sched_entity_destroy(&ctx->entities[0][i].entity);
>> -    kfree(ctx->entities[0]);
>> +    for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i)
>> + drm_sched_entity_destroy(&ctx->entities[hw_ip][i].entity);
>>   -error_free_fences:
>> -    kfree(ctx->fences);
>> -    ctx->fences = NULL;
>>       return r;
>>   }
>>   -static void amdgpu_ctx_fini(struct kref *ref)
>> -{
>> -    struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, 
>> refcount);
>> -    unsigned num_entities = amdgpu_ctx_total_num_entities();
>> -    struct amdgpu_device *adev = ctx->adev;
>> -    unsigned i, j;
>> -
>> -    if (!adev)
>> -        return;
>> -
>> -    for (i = 0; i < num_entities; ++i)
>> -        for (j = 0; j < amdgpu_sched_jobs; ++j)
>> -            dma_fence_put(ctx->entities[0][i].fences[j]);
>> -    kfree(ctx->fences);
>> -    kfree(ctx->entities[0]);
>> -
>> -    mutex_destroy(&ctx->lock);
>> -
>> -    kfree(ctx);
>> -}
>> -
>>   int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 
>> instance,
>>                 u32 ring, struct drm_sched_entity **entity)
>>   {
>> +    int r;
>> +
>>       if (hw_ip >= AMDGPU_HW_IP_NUM) {
>>           DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
>>           return -EINVAL;
>> @@ -244,6 +257,13 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx 
>> *ctx, u32 hw_ip, u32 instance,
>>           return -EINVAL;
>>       }
>>   +    if (ctx->entities[hw_ip][ring].sequence == -1) {
>> +        r = amdgpu_ctx_init_entity(ctx, hw_ip);
>> +
>> +        if (r)
>> +            return r;
>> +    }
>> +
>>       *entity = &ctx->entities[hw_ip][ring].entity;
>>       return 0;
>>   }
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx