[PATCH] drm/amdgpu: allocate entities on demand

Christian König christian.koenig at amd.com
Tue Jan 28 12:18:33 UTC 2020


Patch is Reviewed-by: Christian König <christian.koenig at amd.com>

Regards,
Christian.

Am 28.01.20 um 11:13 schrieb Nirmoy:
> Gentle reminder !
>
> On 1/24/20 5:31 PM, Nirmoy Das wrote:
>> Currently we pre-allocate entities and fences for all the HW IPs on
>> context creation and some of which are might never be used.
>>
>> This patch tries to resolve entity/fences wastage by creating entity
>> only when needed.
>>
>> v2: allocate memory for entity and fences together
>>
>> Signed-off-by: Nirmoy Das <nirmoy.das at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 235 ++++++++++++------------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h |   6 +-
>>   2 files changed, 124 insertions(+), 117 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> index 05c2af61e7de..94a6c42f29ea 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> @@ -42,19 +42,12 @@ const unsigned int 
>> amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
>>       [AMDGPU_HW_IP_VCN_JPEG]    =    1,
>>   };
>>   -static int amdgpu_ctx_total_num_entities(void)
>> -{
>> -    unsigned i, num_entities = 0;
>> -
>> -    for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
>> -        num_entities += amdgpu_ctx_num_entities[i];
>> -
>> -    return num_entities;
>> -}
>> -
>>   static int amdgpu_ctx_priority_permit(struct drm_file *filp,
>>                         enum drm_sched_priority priority)
>>   {
>> +    if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
>> +        return -EINVAL;
>> +
>>       /* NORMAL and below are accessible by everyone */
>>       if (priority <= DRM_SCHED_PRIORITY_NORMAL)
>>           return 0;
>> @@ -68,64 +61,24 @@ static int amdgpu_ctx_priority_permit(struct 
>> drm_file *filp,
>>       return -EACCES;
>>   }
>>   -static int amdgpu_ctx_init(struct amdgpu_device *adev,
>> -               enum drm_sched_priority priority,
>> -               struct drm_file *filp,
>> -               struct amdgpu_ctx *ctx)
>> +static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, const u32 
>> hw_ip, const u32 ring)
>>   {
>> -    unsigned num_entities = amdgpu_ctx_total_num_entities();
>> -    unsigned i, j;
>> +    struct amdgpu_device *adev = ctx->adev;
>> +    struct amdgpu_ctx_entity *entity;
>> +    struct drm_gpu_scheduler **scheds = NULL, *sched = NULL;
>> +    unsigned num_scheds = 0;
>> +    enum drm_sched_priority priority;
>>       int r;
>>   -    if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
>> -        return -EINVAL;
>> -
>> -    r = amdgpu_ctx_priority_permit(filp, priority);
>> -    if (r)
>> -        return r;
>> -
>> -    memset(ctx, 0, sizeof(*ctx));
>> -    ctx->adev = adev;
>> -
>> -
>> -    ctx->entities[0] = kcalloc(num_entities,
>> -                   sizeof(struct amdgpu_ctx_entity),
>> -                   GFP_KERNEL);
>> -    if (!ctx->entities[0])
>> -        return -ENOMEM;
>> -
>> +    entity = kcalloc(1, offsetof(typeof(*entity), 
>> fences[amdgpu_sched_jobs]),
>> +             GFP_KERNEL);
>> +    if (!entity)
>> +        return  -ENOMEM;
>>   -    for (i = 0; i < num_entities; ++i) {
>> -        struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
>> -
>> -        entity->sequence = 1;
>> -        entity->fences = kcalloc(amdgpu_sched_jobs,
>> -                     sizeof(struct dma_fence*), GFP_KERNEL);
>> -        if (!entity->fences) {
>> -            r = -ENOMEM;
>> -            goto error_cleanup_memory;
>> -        }
>> -    }
>> -    for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
>> -        ctx->entities[i] = ctx->entities[i - 1] +
>> -            amdgpu_ctx_num_entities[i - 1];
>> -
>> -    kref_init(&ctx->refcount);
>> -    spin_lock_init(&ctx->ring_lock);
>> -    mutex_init(&ctx->lock);
>> -
>> -    ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
>> -    ctx->reset_counter_query = ctx->reset_counter;
>> -    ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
>> -    ctx->init_priority = priority;
>> -    ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
>> -
>> -    for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
>> -        struct drm_gpu_scheduler **scheds;
>> -        struct drm_gpu_scheduler *sched;
>> -        unsigned num_scheds = 0;
>> -
>> -        switch (i) {
>> +    entity->sequence = 1;
>> +    priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
>> +                ctx->init_priority : ctx->override_priority;
>> +    switch (hw_ip) {
>>           case AMDGPU_HW_IP_GFX:
>>               sched = &adev->gfx.gfx_ring[0].sched;
>>               scheds = &sched;
>> @@ -166,63 +119,90 @@ static int amdgpu_ctx_init(struct amdgpu_device 
>> *adev,
>>               scheds = adev->jpeg.jpeg_sched;
>>               num_scheds =  adev->jpeg.num_jpeg_sched;
>>               break;
>> -        }
>> -
>> -        for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
>> -            r = drm_sched_entity_init(&ctx->entities[i][j].entity,
>> -                          priority, scheds,
>> -                          num_scheds, &ctx->guilty);
>> -        if (r)
>> -            goto error_cleanup_entities;
>>       }
>>   +    r = drm_sched_entity_init(&entity->entity, priority, scheds, 
>> num_scheds,
>> +                  &ctx->guilty);
>> +    if (r)
>> +        goto error_free_entity;
>> +
>> +    ctx->entities[hw_ip][ring] = entity;
>>       return 0;
>>   -error_cleanup_entities:
>> -    for (i = 0; i < num_entities; ++i)
>> - drm_sched_entity_destroy(&ctx->entities[0][i].entity);
>> +error_free_entity:
>> +    kfree(entity);
>>   -error_cleanup_memory:
>> -    for (i = 0; i < num_entities; ++i) {
>> -        struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
>> +    return r;
>> +}
>>   -        kfree(entity->fences);
>> -        entity->fences = NULL;
>> -    }
>> +static int amdgpu_ctx_init(struct amdgpu_device *adev,
>> +               enum drm_sched_priority priority,
>> +               struct drm_file *filp,
>> +               struct amdgpu_ctx *ctx)
>> +{
>> +    int r;
>>   -    kfree(ctx->entities[0]);
>> -    ctx->entities[0] = NULL;
>> -    return r;
>> +    r = amdgpu_ctx_priority_permit(filp, priority);
>> +    if (r)
>> +        return r;
>> +
>> +    memset(ctx, 0, sizeof(*ctx));
>> +
>> +    ctx->adev = adev;
>> +
>> +    kref_init(&ctx->refcount);
>> +    spin_lock_init(&ctx->ring_lock);
>> +    mutex_init(&ctx->lock);
>> +
>> +    ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
>> +    ctx->reset_counter_query = ctx->reset_counter;
>> +    ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
>> +    ctx->init_priority = priority;
>> +    ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
>> +
>> +    return 0;
>> +
>> +}
>> +
>> +static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
>> +{
>> +
>> +    int i;
>> +
>> +    if (!entity)
>> +        return;
>> +
>> +    for (i = 0; i < amdgpu_sched_jobs; ++i)
>> +        dma_fence_put(entity->fences[i]);
>> +
>> +    kfree(entity);
>>   }
>>     static void amdgpu_ctx_fini(struct kref *ref)
>>   {
>>       struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, 
>> refcount);
>> -    unsigned num_entities = amdgpu_ctx_total_num_entities();
>>       struct amdgpu_device *adev = ctx->adev;
>>       unsigned i, j;
>>         if (!adev)
>>           return;
>>   -    for (i = 0; i < num_entities; ++i) {
>> -        struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
>> -
>> -        for (j = 0; j < amdgpu_sched_jobs; ++j)
>> -            dma_fence_put(entity->fences[j]);
>> -
>> -        kfree(entity->fences);
>> +    for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
>> +        for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) {
>> +            amdgpu_ctx_fini_entity(ctx->entities[i][j]);
>> +            ctx->entities[i][j] = NULL;
>> +        }
>>       }
>>   -    kfree(ctx->entities[0]);
>>       mutex_destroy(&ctx->lock);
>> -
>>       kfree(ctx);
>>   }
>>     int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 
>> instance,
>>                 u32 ring, struct drm_sched_entity **entity)
>>   {
>> +    int r;
>> +
>>       if (hw_ip >= AMDGPU_HW_IP_NUM) {
>>           DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
>>           return -EINVAL;
>> @@ -239,7 +219,13 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx 
>> *ctx, u32 hw_ip, u32 instance,
>>           return -EINVAL;
>>       }
>>   -    *entity = &ctx->entities[hw_ip][ring].entity;
>> +    if (ctx->entities[hw_ip][ring] == NULL) {
>> +        r = amdgpu_ctx_init_entity(ctx, hw_ip, ring);
>> +        if (r)
>> +            return r;
>> +    }
>> +
>> +    *entity = &ctx->entities[hw_ip][ring]->entity;
>>       return 0;
>>   }
>>   @@ -279,14 +265,17 @@ static int amdgpu_ctx_alloc(struct 
>> amdgpu_device *adev,
>>   static void amdgpu_ctx_do_release(struct kref *ref)
>>   {
>>       struct amdgpu_ctx *ctx;
>> -    unsigned num_entities;
>> -    u32 i;
>> +    u32 i, j;
>>         ctx = container_of(ref, struct amdgpu_ctx, refcount);
>> +    for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
>> +        for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
>> +            if (!ctx->entities[i][j])
>> +                continue;
>>   -    num_entities = amdgpu_ctx_total_num_entities();
>> -    for (i = 0; i < num_entities; i++)
>> - drm_sched_entity_destroy(&ctx->entities[0][i].entity);
>> + drm_sched_entity_destroy(&ctx->entities[i][j]->entity);
>> +        }
>> +    }
>>         amdgpu_ctx_fini(ref);
>>   }
>> @@ -516,19 +505,23 @@ struct dma_fence *amdgpu_ctx_get_fence(struct 
>> amdgpu_ctx *ctx,
>>   void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
>>                     enum drm_sched_priority priority)
>>   {
>> -    unsigned num_entities = amdgpu_ctx_total_num_entities();
>>       enum drm_sched_priority ctx_prio;
>> -    unsigned i;
>> +    unsigned i, j;
>>         ctx->override_priority = priority;
>>         ctx_prio = (ctx->override_priority == 
>> DRM_SCHED_PRIORITY_UNSET) ?
>>               ctx->init_priority : ctx->override_priority;
>> +    for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
>> +        for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
>> +            struct drm_sched_entity *entity;
>>   -    for (i = 0; i < num_entities; i++) {
>> -        struct drm_sched_entity *entity = &ctx->entities[0][i].entity;
>> +            if (!ctx->entities[i][j])
>> +                continue;
>>   -        drm_sched_entity_set_priority(entity, ctx_prio);
>> +            entity = &ctx->entities[i][j]->entity;
>> +            drm_sched_entity_set_priority(entity, ctx_prio);
>> +        }
>>       }
>>   }
>>   @@ -564,20 +557,24 @@ void amdgpu_ctx_mgr_init(struct 
>> amdgpu_ctx_mgr *mgr)
>>     long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long 
>> timeout)
>>   {
>> -    unsigned num_entities = amdgpu_ctx_total_num_entities();
>>       struct amdgpu_ctx *ctx;
>>       struct idr *idp;
>> -    uint32_t id, i;
>> +    uint32_t id, i, j;
>>         idp = &mgr->ctx_handles;
>>         mutex_lock(&mgr->lock);
>>       idr_for_each_entry(idp, ctx, id) {
>> -        for (i = 0; i < num_entities; i++) {
>> -            struct drm_sched_entity *entity;
>> +        for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
>> +            for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
>> +                struct drm_sched_entity *entity;
>> +
>> +                if (!ctx->entities[i][j])
>> +                    continue;
>>   -            entity = &ctx->entities[0][i].entity;
>> -            timeout = drm_sched_entity_flush(entity, timeout);
>> +                entity = &ctx->entities[i][j]->entity;
>> +                timeout = drm_sched_entity_flush(entity, timeout);
>> +            }
>>           }
>>       }
>>       mutex_unlock(&mgr->lock);
>> @@ -586,10 +583,9 @@ long amdgpu_ctx_mgr_entity_flush(struct 
>> amdgpu_ctx_mgr *mgr, long timeout)
>>     void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
>>   {
>> -    unsigned num_entities = amdgpu_ctx_total_num_entities();
>>       struct amdgpu_ctx *ctx;
>>       struct idr *idp;
>> -    uint32_t id, i;
>> +    uint32_t id, i, j;
>>         idp = &mgr->ctx_handles;
>>   @@ -599,8 +595,17 @@ void amdgpu_ctx_mgr_entity_fini(struct 
>> amdgpu_ctx_mgr *mgr)
>>               continue;
>>           }
>>   -        for (i = 0; i < num_entities; i++)
>> - drm_sched_entity_fini(&ctx->entities[0][i].entity);
>> +        for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
>> +            for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
>> +                struct drm_sched_entity *entity;
>> +
>> +                if (!ctx->entities[i][j])
>> +                    continue;
>> +
>> +                entity = &ctx->entities[i][j]->entity;
>> +                drm_sched_entity_fini(entity);
>> +            }
>> +        }
>>       }
>>   }
>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>> index a6cd9d4b078c..de490f183af2 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>> @@ -29,10 +29,12 @@ struct drm_device;
>>   struct drm_file;
>>   struct amdgpu_fpriv;
>>   +#define AMDGPU_MAX_ENTITY_NUM 4
>> +
>>   struct amdgpu_ctx_entity {
>>       uint64_t        sequence;
>> -    struct dma_fence    **fences;
>>       struct drm_sched_entity    entity;
>> +    struct dma_fence    *fences[];
>>   };
>>     struct amdgpu_ctx {
>> @@ -42,7 +44,7 @@ struct amdgpu_ctx {
>>       unsigned            reset_counter_query;
>>       uint32_t            vram_lost_counter;
>>       spinlock_t            ring_lock;
>> -    struct amdgpu_ctx_entity    *entities[AMDGPU_HW_IP_NUM];
>> +    struct amdgpu_ctx_entity 
>> *entities[AMDGPU_HW_IP_NUM][AMDGPU_MAX_ENTITY_NUM];
>>       bool                preamble_presented;
>>       enum drm_sched_priority        init_priority;
>>       enum drm_sched_priority        override_priority;



More information about the amd-gfx mailing list