[PATCH] drm/amdgpu: allocate entities on demand
Christian König
christian.koenig at amd.com
Tue Jan 28 12:18:33 UTC 2020
Patch is Reviewed-by: Christian König <christian.koenig at amd.com>
Regards,
Christian.
Am 28.01.20 um 11:13 schrieb Nirmoy:
> Gentle reminder !
>
> On 1/24/20 5:31 PM, Nirmoy Das wrote:
>> Currently we pre-allocate entities and fences for all the HW IPs on
>> context creation and some of which are might never be used.
>>
>> This patch tries to resolve entity/fences wastage by creating entity
>> only when needed.
>>
>> v2: allocate memory for entity and fences together
>>
>> Signed-off-by: Nirmoy Das <nirmoy.das at amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 235 ++++++++++++------------
>> drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 6 +-
>> 2 files changed, 124 insertions(+), 117 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> index 05c2af61e7de..94a6c42f29ea 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> @@ -42,19 +42,12 @@ const unsigned int
>> amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
>> [AMDGPU_HW_IP_VCN_JPEG] = 1,
>> };
>> -static int amdgpu_ctx_total_num_entities(void)
>> -{
>> - unsigned i, num_entities = 0;
>> -
>> - for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
>> - num_entities += amdgpu_ctx_num_entities[i];
>> -
>> - return num_entities;
>> -}
>> -
>> static int amdgpu_ctx_priority_permit(struct drm_file *filp,
>> enum drm_sched_priority priority)
>> {
>> + if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
>> + return -EINVAL;
>> +
>> /* NORMAL and below are accessible by everyone */
>> if (priority <= DRM_SCHED_PRIORITY_NORMAL)
>> return 0;
>> @@ -68,64 +61,24 @@ static int amdgpu_ctx_priority_permit(struct
>> drm_file *filp,
>> return -EACCES;
>> }
>> -static int amdgpu_ctx_init(struct amdgpu_device *adev,
>> - enum drm_sched_priority priority,
>> - struct drm_file *filp,
>> - struct amdgpu_ctx *ctx)
>> +static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, const u32
>> hw_ip, const u32 ring)
>> {
>> - unsigned num_entities = amdgpu_ctx_total_num_entities();
>> - unsigned i, j;
>> + struct amdgpu_device *adev = ctx->adev;
>> + struct amdgpu_ctx_entity *entity;
>> + struct drm_gpu_scheduler **scheds = NULL, *sched = NULL;
>> + unsigned num_scheds = 0;
>> + enum drm_sched_priority priority;
>> int r;
>> - if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
>> - return -EINVAL;
>> -
>> - r = amdgpu_ctx_priority_permit(filp, priority);
>> - if (r)
>> - return r;
>> -
>> - memset(ctx, 0, sizeof(*ctx));
>> - ctx->adev = adev;
>> -
>> -
>> - ctx->entities[0] = kcalloc(num_entities,
>> - sizeof(struct amdgpu_ctx_entity),
>> - GFP_KERNEL);
>> - if (!ctx->entities[0])
>> - return -ENOMEM;
>> -
>> + entity = kcalloc(1, offsetof(typeof(*entity),
>> fences[amdgpu_sched_jobs]),
>> + GFP_KERNEL);
>> + if (!entity)
>> + return -ENOMEM;
>> - for (i = 0; i < num_entities; ++i) {
>> - struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
>> -
>> - entity->sequence = 1;
>> - entity->fences = kcalloc(amdgpu_sched_jobs,
>> - sizeof(struct dma_fence*), GFP_KERNEL);
>> - if (!entity->fences) {
>> - r = -ENOMEM;
>> - goto error_cleanup_memory;
>> - }
>> - }
>> - for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
>> - ctx->entities[i] = ctx->entities[i - 1] +
>> - amdgpu_ctx_num_entities[i - 1];
>> -
>> - kref_init(&ctx->refcount);
>> - spin_lock_init(&ctx->ring_lock);
>> - mutex_init(&ctx->lock);
>> -
>> - ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
>> - ctx->reset_counter_query = ctx->reset_counter;
>> - ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
>> - ctx->init_priority = priority;
>> - ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
>> -
>> - for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
>> - struct drm_gpu_scheduler **scheds;
>> - struct drm_gpu_scheduler *sched;
>> - unsigned num_scheds = 0;
>> -
>> - switch (i) {
>> + entity->sequence = 1;
>> + priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
>> + ctx->init_priority : ctx->override_priority;
>> + switch (hw_ip) {
>> case AMDGPU_HW_IP_GFX:
>> sched = &adev->gfx.gfx_ring[0].sched;
>> scheds = &sched;
>> @@ -166,63 +119,90 @@ static int amdgpu_ctx_init(struct amdgpu_device
>> *adev,
>> scheds = adev->jpeg.jpeg_sched;
>> num_scheds = adev->jpeg.num_jpeg_sched;
>> break;
>> - }
>> -
>> - for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
>> - r = drm_sched_entity_init(&ctx->entities[i][j].entity,
>> - priority, scheds,
>> - num_scheds, &ctx->guilty);
>> - if (r)
>> - goto error_cleanup_entities;
>> }
>> + r = drm_sched_entity_init(&entity->entity, priority, scheds,
>> num_scheds,
>> + &ctx->guilty);
>> + if (r)
>> + goto error_free_entity;
>> +
>> + ctx->entities[hw_ip][ring] = entity;
>> return 0;
>> -error_cleanup_entities:
>> - for (i = 0; i < num_entities; ++i)
>> - drm_sched_entity_destroy(&ctx->entities[0][i].entity);
>> +error_free_entity:
>> + kfree(entity);
>> -error_cleanup_memory:
>> - for (i = 0; i < num_entities; ++i) {
>> - struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
>> + return r;
>> +}
>> - kfree(entity->fences);
>> - entity->fences = NULL;
>> - }
>> +static int amdgpu_ctx_init(struct amdgpu_device *adev,
>> + enum drm_sched_priority priority,
>> + struct drm_file *filp,
>> + struct amdgpu_ctx *ctx)
>> +{
>> + int r;
>> - kfree(ctx->entities[0]);
>> - ctx->entities[0] = NULL;
>> - return r;
>> + r = amdgpu_ctx_priority_permit(filp, priority);
>> + if (r)
>> + return r;
>> +
>> + memset(ctx, 0, sizeof(*ctx));
>> +
>> + ctx->adev = adev;
>> +
>> + kref_init(&ctx->refcount);
>> + spin_lock_init(&ctx->ring_lock);
>> + mutex_init(&ctx->lock);
>> +
>> + ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
>> + ctx->reset_counter_query = ctx->reset_counter;
>> + ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
>> + ctx->init_priority = priority;
>> + ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
>> +
>> + return 0;
>> +
>> +}
>> +
>> +static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
>> +{
>> +
>> + int i;
>> +
>> + if (!entity)
>> + return;
>> +
>> + for (i = 0; i < amdgpu_sched_jobs; ++i)
>> + dma_fence_put(entity->fences[i]);
>> +
>> + kfree(entity);
>> }
>> static void amdgpu_ctx_fini(struct kref *ref)
>> {
>> struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx,
>> refcount);
>> - unsigned num_entities = amdgpu_ctx_total_num_entities();
>> struct amdgpu_device *adev = ctx->adev;
>> unsigned i, j;
>> if (!adev)
>> return;
>> - for (i = 0; i < num_entities; ++i) {
>> - struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
>> -
>> - for (j = 0; j < amdgpu_sched_jobs; ++j)
>> - dma_fence_put(entity->fences[j]);
>> -
>> - kfree(entity->fences);
>> + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
>> + for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) {
>> + amdgpu_ctx_fini_entity(ctx->entities[i][j]);
>> + ctx->entities[i][j] = NULL;
>> + }
>> }
>> - kfree(ctx->entities[0]);
>> mutex_destroy(&ctx->lock);
>> -
>> kfree(ctx);
>> }
>> int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32
>> instance,
>> u32 ring, struct drm_sched_entity **entity)
>> {
>> + int r;
>> +
>> if (hw_ip >= AMDGPU_HW_IP_NUM) {
>> DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
>> return -EINVAL;
>> @@ -239,7 +219,13 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx
>> *ctx, u32 hw_ip, u32 instance,
>> return -EINVAL;
>> }
>> - *entity = &ctx->entities[hw_ip][ring].entity;
>> + if (ctx->entities[hw_ip][ring] == NULL) {
>> + r = amdgpu_ctx_init_entity(ctx, hw_ip, ring);
>> + if (r)
>> + return r;
>> + }
>> +
>> + *entity = &ctx->entities[hw_ip][ring]->entity;
>> return 0;
>> }
>> @@ -279,14 +265,17 @@ static int amdgpu_ctx_alloc(struct
>> amdgpu_device *adev,
>> static void amdgpu_ctx_do_release(struct kref *ref)
>> {
>> struct amdgpu_ctx *ctx;
>> - unsigned num_entities;
>> - u32 i;
>> + u32 i, j;
>> ctx = container_of(ref, struct amdgpu_ctx, refcount);
>> + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
>> + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
>> + if (!ctx->entities[i][j])
>> + continue;
>> - num_entities = amdgpu_ctx_total_num_entities();
>> - for (i = 0; i < num_entities; i++)
>> - drm_sched_entity_destroy(&ctx->entities[0][i].entity);
>> + drm_sched_entity_destroy(&ctx->entities[i][j]->entity);
>> + }
>> + }
>> amdgpu_ctx_fini(ref);
>> }
>> @@ -516,19 +505,23 @@ struct dma_fence *amdgpu_ctx_get_fence(struct
>> amdgpu_ctx *ctx,
>> void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
>> enum drm_sched_priority priority)
>> {
>> - unsigned num_entities = amdgpu_ctx_total_num_entities();
>> enum drm_sched_priority ctx_prio;
>> - unsigned i;
>> + unsigned i, j;
>> ctx->override_priority = priority;
>> ctx_prio = (ctx->override_priority ==
>> DRM_SCHED_PRIORITY_UNSET) ?
>> ctx->init_priority : ctx->override_priority;
>> + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
>> + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
>> + struct drm_sched_entity *entity;
>> - for (i = 0; i < num_entities; i++) {
>> - struct drm_sched_entity *entity = &ctx->entities[0][i].entity;
>> + if (!ctx->entities[i][j])
>> + continue;
>> - drm_sched_entity_set_priority(entity, ctx_prio);
>> + entity = &ctx->entities[i][j]->entity;
>> + drm_sched_entity_set_priority(entity, ctx_prio);
>> + }
>> }
>> }
>> @@ -564,20 +557,24 @@ void amdgpu_ctx_mgr_init(struct
>> amdgpu_ctx_mgr *mgr)
>> long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long
>> timeout)
>> {
>> - unsigned num_entities = amdgpu_ctx_total_num_entities();
>> struct amdgpu_ctx *ctx;
>> struct idr *idp;
>> - uint32_t id, i;
>> + uint32_t id, i, j;
>> idp = &mgr->ctx_handles;
>> mutex_lock(&mgr->lock);
>> idr_for_each_entry(idp, ctx, id) {
>> - for (i = 0; i < num_entities; i++) {
>> - struct drm_sched_entity *entity;
>> + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
>> + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
>> + struct drm_sched_entity *entity;
>> +
>> + if (!ctx->entities[i][j])
>> + continue;
>> - entity = &ctx->entities[0][i].entity;
>> - timeout = drm_sched_entity_flush(entity, timeout);
>> + entity = &ctx->entities[i][j]->entity;
>> + timeout = drm_sched_entity_flush(entity, timeout);
>> + }
>> }
>> }
>> mutex_unlock(&mgr->lock);
>> @@ -586,10 +583,9 @@ long amdgpu_ctx_mgr_entity_flush(struct
>> amdgpu_ctx_mgr *mgr, long timeout)
>> void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
>> {
>> - unsigned num_entities = amdgpu_ctx_total_num_entities();
>> struct amdgpu_ctx *ctx;
>> struct idr *idp;
>> - uint32_t id, i;
>> + uint32_t id, i, j;
>> idp = &mgr->ctx_handles;
>> @@ -599,8 +595,17 @@ void amdgpu_ctx_mgr_entity_fini(struct
>> amdgpu_ctx_mgr *mgr)
>> continue;
>> }
>> - for (i = 0; i < num_entities; i++)
>> - drm_sched_entity_fini(&ctx->entities[0][i].entity);
>> + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
>> + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
>> + struct drm_sched_entity *entity;
>> +
>> + if (!ctx->entities[i][j])
>> + continue;
>> +
>> + entity = &ctx->entities[i][j]->entity;
>> + drm_sched_entity_fini(entity);
>> + }
>> + }
>> }
>> }
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>> index a6cd9d4b078c..de490f183af2 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>> @@ -29,10 +29,12 @@ struct drm_device;
>> struct drm_file;
>> struct amdgpu_fpriv;
>> +#define AMDGPU_MAX_ENTITY_NUM 4
>> +
>> struct amdgpu_ctx_entity {
>> uint64_t sequence;
>> - struct dma_fence **fences;
>> struct drm_sched_entity entity;
>> + struct dma_fence *fences[];
>> };
>> struct amdgpu_ctx {
>> @@ -42,7 +44,7 @@ struct amdgpu_ctx {
>> unsigned reset_counter_query;
>> uint32_t vram_lost_counter;
>> spinlock_t ring_lock;
>> - struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM];
>> + struct amdgpu_ctx_entity
>> *entities[AMDGPU_HW_IP_NUM][AMDGPU_MAX_ENTITY_NUM];
>> bool preamble_presented;
>> enum drm_sched_priority init_priority;
>> enum drm_sched_priority override_priority;
More information about the amd-gfx
mailing list