[PATCH 2/2] drm/amdgpu: allocate entities on demand

Wed Jan 22 12:03:17 UTC 2020

On 1/22/20 12:39 PM, Christian König wrote:
> Am 22.01.20 um 10:33 schrieb Nirmoy Das:
>> Currently we pre-allocate entities and fences for all the HW IPs on
>> context creation and some of which are might never be used.
>>
>> This patch tries to resolve entity/fences wastage by creating entities
>> for a HW IP only when it is required.
>>
>> v2: consolidated priority checking at amdgpu_ctx_priority_permit()
>
> Well that is not really what I had in mind. See almost all 
> applications use only the first instance of a hw_ip.
>
> So what we should probably do allocate entities on demand instead of 
> whole hw_ips.
>
> Do you know what I mean?

Okay I understand what you mean. I will rework this and get back to you.

Regards,

Nirmoy

>
> Regards,
> Christian.
>
>>
>> Signed-off-by: Nirmoy Das <nirmoy.das at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 160 +++++++++++++-----------
>>   1 file changed, 87 insertions(+), 73 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> index eecbd68db986..d704e1bebb1b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> @@ -45,6 +45,9 @@ const unsigned int 
>> amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
>>   static int amdgpu_ctx_priority_permit(struct drm_file *filp,
>>                         enum drm_sched_priority priority)
>>   {
>> +    if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
>> +        return -EINVAL;
>> +
>>       /* NORMAL and below are accessible by everyone */
>>       if (priority <= DRM_SCHED_PRIORITY_NORMAL)
>>           return 0;
>> @@ -58,65 +61,37 @@ static int amdgpu_ctx_priority_permit(struct 
>> drm_file *filp,
>>       return -EACCES;
>>   }
>>   -static int amdgpu_ctx_init(struct amdgpu_device *adev,
>> -               enum drm_sched_priority priority,
>> -               struct drm_file *filp,
>> -               struct amdgpu_ctx *ctx)
>> +static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, const u32 
>> hw_ip)
>>   {
>> -    unsigned i, j;
>> -    int r;
>> +    struct amdgpu_device *adev = ctx->adev;
>> +    struct drm_gpu_scheduler **scheds;
>> +    struct drm_gpu_scheduler *sched;
>> +    unsigned num_scheds = 0;
>> +    enum drm_sched_priority priority;
>> +    int j, r;
>>   -    if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
>> -        return -EINVAL;
>> +    ctx->entities[hw_ip] = kcalloc(amdgpu_ctx_num_entities[hw_ip],
>> +                       sizeof(struct amdgpu_ctx_entity), GFP_KERNEL);
>>   -    r = amdgpu_ctx_priority_permit(filp, priority);
>> -    if (r)
>> -        return r;
>> +    if (!ctx->entities[hw_ip])
>> +        return  -ENOMEM;
>>   -    memset(ctx, 0, sizeof(*ctx));
>> -    ctx->adev = adev;
>> +    for (j = 0; j < amdgpu_ctx_num_entities[hw_ip]; ++j) {
>>   -    for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
>> -        ctx->entities[i] = kcalloc(amdgpu_ctx_num_entities[i],
>> -                       sizeof(struct amdgpu_ctx_entity),
>> -                       GFP_KERNEL);
>> +        struct amdgpu_ctx_entity *entity = &ctx->entities[hw_ip][j];
>>   -        if (!ctx->entities[0]) {
>> -            r =  -ENOMEM;
>> -            goto error_cleanup_entity_memory;
>> +        entity->sequence = 1;
>> +        entity->fences = kcalloc(amdgpu_sched_jobs,
>> +                     sizeof(struct dma_fence*), GFP_KERNEL);
>> +        if (!entity->fences) {
>> +            r = -ENOMEM;
>> +            goto error_cleanup_memory;
>>           }
>>       }
>>   -    for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
>> -        for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
>> -            struct amdgpu_ctx_entity *entity = &ctx->entities[i][j];
>> -
>> -            entity->sequence = 1;
>> -            entity->fences = kcalloc(amdgpu_sched_jobs,
>> -                         sizeof(struct dma_fence*), GFP_KERNEL);
>> -            if (!entity->fences) {
>> -                r = -ENOMEM;
>> -                goto error_cleanup_memory;
>> -            }
>> -        }
>> -    }
>> -
>> -    kref_init(&ctx->refcount);
>> -    spin_lock_init(&ctx->ring_lock);
>> -    mutex_init(&ctx->lock);
>> -
>> -    ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
>> -    ctx->reset_counter_query = ctx->reset_counter;
>> -    ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
>> -    ctx->init_priority = priority;
>> -    ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
>> -
>> -    for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
>> -        struct drm_gpu_scheduler **scheds;
>> -        struct drm_gpu_scheduler *sched;
>> -        unsigned num_scheds = 0;
>> -
>> -        switch (i) {
>> +    priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
>> +                ctx->init_priority : ctx->override_priority;
>> +    switch (hw_ip) {
>>           case AMDGPU_HW_IP_GFX:
>>               sched = &adev->gfx.gfx_ring[0].sched;
>>               scheds = &sched;
>> @@ -157,12 +132,12 @@ static int amdgpu_ctx_init(struct amdgpu_device 
>> *adev,
>>               scheds = adev->jpeg.jpeg_sched;
>>               num_scheds =  adev->jpeg.num_jpeg_sched;
>>               break;
>> -        }
>> +    }
>>   -        for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
>> -            r = drm_sched_entity_init(&ctx->entities[i][j].entity,
>> -                          priority, scheds,
>> -                          num_scheds, &ctx->guilty);
>> +    for (j = 0; j < amdgpu_ctx_num_entities[hw_ip]; ++j) {
>> +        r = drm_sched_entity_init(&ctx->entities[hw_ip][j].entity,
>> +                      priority, scheds,
>> +                      num_scheds, &ctx->guilty);
>>           if (r)
>>               goto error_cleanup_entities;
>>       }
>> @@ -170,30 +145,51 @@ static int amdgpu_ctx_init(struct amdgpu_device 
>> *adev,
>>       return 0;
>>     error_cleanup_entities:
>> -    for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
>> -        for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
>> - drm_sched_entity_destroy(&ctx->entities[i][j].entity);
>> -    }
>> +    for (j = 0; j < amdgpu_ctx_num_entities[hw_ip]; ++j)
>> + drm_sched_entity_destroy(&ctx->entities[hw_ip][j].entity);
>>     error_cleanup_memory:
>> -    for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
>> -        for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
>> -            struct amdgpu_ctx_entity *entity = &ctx->entities[i][j];
>> +    for (j = 0; j < amdgpu_ctx_num_entities[hw_ip]; ++j) {
>> +        struct amdgpu_ctx_entity *entity = &ctx->entities[hw_ip][j];
>>   -            kfree(entity->fences);
>> -            entity->fences = NULL;
>> -        }
>> +        kfree(entity->fences);
>> +        entity->fences = NULL;
>>       }
>>   -error_cleanup_entity_memory:
>> -    for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
>> -        kfree(ctx->entities[i]);
>> -        ctx->entities[i] = NULL;
>> -    }
>> +    kfree(ctx->entities[hw_ip]);
>> +    ctx->entities[hw_ip] = NULL;
>>         return r;
>>   }
>>   +static int amdgpu_ctx_init(struct amdgpu_device *adev,
>> +               enum drm_sched_priority priority,
>> +               struct drm_file *filp,
>> +               struct amdgpu_ctx *ctx)
>> +{
>> +    int r;
>> +
>> +    r = amdgpu_ctx_priority_permit(filp, priority);
>> +    if (r)
>> +        return r;
>> +
>> +    memset(ctx, 0, sizeof(*ctx));
>> +    ctx->adev = adev;
>> +
>> +    kref_init(&ctx->refcount);
>> +    spin_lock_init(&ctx->ring_lock);
>> +    mutex_init(&ctx->lock);
>> +
>> +    ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
>> +    ctx->reset_counter_query = ctx->reset_counter;
>> +    ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
>> +    ctx->init_priority = priority;
>> +    ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
>> +
>> +    return 0;
>> +
>> +}
>> +
>>   static void amdgpu_ctx_fini(struct kref *ref)
>>   {
>>       struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, 
>> refcount);
>> @@ -204,7 +200,14 @@ static void amdgpu_ctx_fini(struct kref *ref)
>>           return;
>>       for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
>>           for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
>> -            struct amdgpu_ctx_entity *entity = &ctx->entities[i][j];
>> +            struct amdgpu_ctx_entity *entity;
>> +
>> +            if (!ctx->entities[i])
>> +                continue;
>> +
>> +            entity = &ctx->entities[i][j];
>> +            if (!entity->fences)
>> +                continue;
>>                 for (k = 0; k < amdgpu_sched_jobs; ++k)
>>                   dma_fence_put(entity->fences[k]);
>> @@ -241,6 +244,9 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, 
>> u32 hw_ip, u32 instance,
>>           return -EINVAL;
>>       }
>>   +    if (ctx->entities[hw_ip] == NULL)
>> +        amdgpu_ctx_init_entity(ctx, hw_ip);
>> +
>>       *entity = &ctx->entities[hw_ip][ring].entity;
>>       return 0;
>>   }
>> @@ -285,8 +291,11 @@ static void amdgpu_ctx_do_release(struct kref *ref)
>>         ctx = container_of(ref, struct amdgpu_ctx, refcount);
>>       for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
>> -        for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
>> +        for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
>> +            if (!ctx->entities[i])
>> +                continue;
>> drm_sched_entity_destroy(&ctx->entities[i][j].entity);
>> +        }
>>       }
>>         amdgpu_ctx_fini(ref);
>> @@ -579,6 +588,9 @@ long amdgpu_ctx_mgr_entity_flush(struct 
>> amdgpu_ctx_mgr *mgr, long timeout)
>>               for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
>>                   struct drm_sched_entity *entity;
>>   +                if (!ctx->entities[i])
>> +                    continue;
>> +
>>                   entity = &ctx->entities[i][j].entity;
>>                   timeout = drm_sched_entity_flush(entity, timeout);
>>               }
>> @@ -601,11 +613,13 @@ void amdgpu_ctx_mgr_entity_fini(struct 
>> amdgpu_ctx_mgr *mgr)
>>               DRM_ERROR("ctx %p is still alive\n", ctx);
>>               continue;
>>           }
>> -
>>           for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
>>               for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
>>                   struct drm_sched_entity *entity;
>>   +                if (!ctx->entities[i])
>> +                    continue;
>> +
>>                   entity = &ctx->entities[i][j].entity;
>>                   drm_sched_entity_fini(entity);
>>               }
>