[RFC PATCH] drm/amdgpu: allocate entities on demand
Nirmoy
nirmodas at amd.com
Tue Nov 26 14:41:59 UTC 2019
Thanks Christian for reviewing it. I will try to cleanup
drm_sched_entity_set_priority and come up with another patch.
On 11/26/19 10:45 AM, Christian König wrote:
> It looks like a start, but there numerous things which needs to be fixed.
>
> Question number one is: What's that good for? Entities are not the
> problem here. The real issue is the fence ring and the rq_list.
>
> The rq_list could actually be made constant since it should never be
> changed by the entity. It is only changed for backward compatibility
> in drm_sched_entity_set_priority().
>
> So I would start there and cleanup the drm_sched_entity_set_priority()
> to actually just set a new constant rq list instead.
>
> Then we could embed the fences in amdgpu_ctx_entity as dynamic array
> at the end of the structure.
>
> And last we can start to dynamic allocate and initialize the
> amdgpu_ctx_entity() structures.
>
> Regards,
> Christian.
>
> Am 26.11.19 um 00:31 schrieb Nirmoy:
>> Ran amdgpu_test(drm) successfully multiple times to test this. But I
>> am pretty sure I am missing some corner case here.
>>
>>
>> Regards,
>>
>> Nirmoy
>>
>> On 11/26/19 12:17 AM, Nirmoy Das wrote:
>>> Currently we pre-allocate entities for all the HW IPs on
>>> context creation and some of which are might never be used.
>>>
>>> This patch tries to resolve entity wastage by creating entities
>>> for a HW IP only when it is required.
>>>
>>> Signed-off-by: Nirmoy Das <nirmoy.das at amd.com>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 142
>>> ++++++++++++++----------
>>> 1 file changed, 81 insertions(+), 61 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>>> index a0d3d7b756eb..0a390ebe873f 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>>> @@ -74,7 +74,7 @@ static int amdgpu_ctx_init(struct amdgpu_device
>>> *adev,
>>> struct amdgpu_ctx *ctx)
>>> {
>>> unsigned num_entities = amdgpu_ctx_total_num_entities();
>>> - unsigned i, j, k;
>>> + unsigned i;
>>> int r;
>>> if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
>>> @@ -103,7 +103,7 @@ static int amdgpu_ctx_init(struct amdgpu_device
>>> *adev,
>>> for (i = 0; i < num_entities; ++i) {
>>> struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
>>> - entity->sequence = 1;
>>> + entity->sequence = -1;
>>> entity->fences = &ctx->fences[amdgpu_sched_jobs * i];
>>> }
>>> for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
>>> @@ -120,25 +120,59 @@ static int amdgpu_ctx_init(struct
>>> amdgpu_device *adev,
>>> ctx->init_priority = priority;
>>> ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
>>> - for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
>>> - struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
>>> - struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
>>> - unsigned num_rings = 0;
>>> - unsigned num_rqs = 0;
>>> + return 0;
>>> +
>>> +error_free_fences:
>>> + kfree(ctx->fences);
>>> + ctx->fences = NULL;
>>> + return r;
>>> +}
>>> +
>>> +static void amdgpu_ctx_fini(struct kref *ref)
>>> +{
>>> + struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx,
>>> refcount);
>>> + unsigned num_entities = amdgpu_ctx_total_num_entities();
>>> + struct amdgpu_device *adev = ctx->adev;
>>> + unsigned i, j;
>>> +
>>> + if (!adev)
>>> + return;
>>> +
>>> + for (i = 0; i < num_entities; ++i)
>>> + for (j = 0; j < amdgpu_sched_jobs; ++j)
>>> + dma_fence_put(ctx->entities[0][i].fences[j]);
>>> + kfree(ctx->fences);
>>> + kfree(ctx->entities[0]);
>>> +
>>> + mutex_destroy(&ctx->lock);
>>> +
>>> + kfree(ctx);
>>> +}
>>> - switch (i) {
>>> +
>>> +int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip)
>> This should be a static function which I forgot to change
>>> +{
>>> + struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
>>> + struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
>>> + struct amdgpu_device *adev = ctx->adev;
>>> + unsigned num_rings = 0;
>>> + unsigned num_rqs = 0;
>>> + unsigned i, j;
>>> + int r = 0;
>>> +
>>> + switch (hw_ip) {
>>> case AMDGPU_HW_IP_GFX:
>>> rings[0] = &adev->gfx.gfx_ring[0];
>>> num_rings = 1;
>>> break;
>>> case AMDGPU_HW_IP_COMPUTE:
>>> - for (j = 0; j < adev->gfx.num_compute_rings; ++j)
>>> - rings[j] = &adev->gfx.compute_ring[j];
>>> + for (i = 0; i < adev->gfx.num_compute_rings; ++i)
>>> + rings[i] = &adev->gfx.compute_ring[i];
>>> num_rings = adev->gfx.num_compute_rings;
>>> break;
>>> case AMDGPU_HW_IP_DMA:
>>> - for (j = 0; j < adev->sdma.num_instances; ++j)
>>> - rings[j] = &adev->sdma.instance[j].ring;
>>> + for (i = 0; i < adev->sdma.num_instances; ++i)
>>> + rings[i] = &adev->sdma.instance[i].ring;
>>> num_rings = adev->sdma.num_instances;
>>> break;
>>> case AMDGPU_HW_IP_UVD:
>>> @@ -154,80 +188,59 @@ static int amdgpu_ctx_init(struct
>>> amdgpu_device *adev,
>>> num_rings = 1;
>>> break;
>>> case AMDGPU_HW_IP_VCN_DEC:
>>> - for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
>>> - if (adev->vcn.harvest_config & (1 << j))
>>> + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
>>> + if (adev->vcn.harvest_config & (1 << i))
>>> continue;
>>> - rings[num_rings++] = &adev->vcn.inst[j].ring_dec;
>>> + rings[num_rings++] = &adev->vcn.inst[i].ring_dec;
>>> }
>>> break;
>>> case AMDGPU_HW_IP_VCN_ENC:
>>> - for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
>>> - if (adev->vcn.harvest_config & (1 << j))
>>> + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
>>> + if (adev->vcn.harvest_config & (1 << i))
>>> continue;
>>> - for (k = 0; k < adev->vcn.num_enc_rings; ++k)
>>> - rings[num_rings++] =
>>> &adev->vcn.inst[j].ring_enc[k];
>>> + for (j = 0; j < adev->vcn.num_enc_rings; ++j)
>>> + rings[num_rings++] =
>>> &adev->vcn.inst[i].ring_enc[j];
>>> }
>>> break;
>>> case AMDGPU_HW_IP_VCN_JPEG:
>>> - for (j = 0; j < adev->jpeg.num_jpeg_inst; ++j) {
>>> - if (adev->vcn.harvest_config & (1 << j))
>>> + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
>>> + if (adev->vcn.harvest_config & (1 << i))
>>> continue;
>>> - rings[num_rings++] = &adev->jpeg.inst[j].ring_dec;
>>> + rings[num_rings++] = &adev->jpeg.inst[i].ring_dec;
>>> }
>>> break;
>>> - }
>>> -
>>> - for (j = 0; j < num_rings; ++j) {
>>> - if (!rings[j]->adev)
>>> - continue;
>>> + }
>>> - rqs[num_rqs++] = &rings[j]->sched.sched_rq[priority];
>>> - }
>>> + for (i = 0; i < num_rings; ++i) {
>>> + if (!rings[i]->adev)
>>> + continue;
>>> - for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
>>> - r = drm_sched_entity_init(&ctx->entities[i][j].entity,
>>> - rqs, num_rqs, &ctx->guilty);
>>> - if (r)
>>> - goto error_cleanup_entities;
>>> + rqs[num_rqs++] =
>>> &rings[i]->sched.sched_rq[ctx->init_priority];
>>> }
>>> + for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i)
>>> + r = drm_sched_entity_init(&ctx->entities[hw_ip][i].entity,
>>> + rqs, num_rqs, &ctx->guilty);
>>> + if (r)
>>> + goto error_cleanup_entities;
>>> +
>>> + for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i)
>>> + ctx->entities[hw_ip][i].sequence = 1;
>>> +
>>> return 0;
>>> error_cleanup_entities:
>>> - for (i = 0; i < num_entities; ++i)
>>> - drm_sched_entity_destroy(&ctx->entities[0][i].entity);
>>> - kfree(ctx->entities[0]);
>>> + for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i)
>>> + drm_sched_entity_destroy(&ctx->entities[hw_ip][i].entity);
>>> -error_free_fences:
>>> - kfree(ctx->fences);
>>> - ctx->fences = NULL;
>>> return r;
>>> }
>>> -static void amdgpu_ctx_fini(struct kref *ref)
>>> -{
>>> - struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx,
>>> refcount);
>>> - unsigned num_entities = amdgpu_ctx_total_num_entities();
>>> - struct amdgpu_device *adev = ctx->adev;
>>> - unsigned i, j;
>>> -
>>> - if (!adev)
>>> - return;
>>> -
>>> - for (i = 0; i < num_entities; ++i)
>>> - for (j = 0; j < amdgpu_sched_jobs; ++j)
>>> - dma_fence_put(ctx->entities[0][i].fences[j]);
>>> - kfree(ctx->fences);
>>> - kfree(ctx->entities[0]);
>>> -
>>> - mutex_destroy(&ctx->lock);
>>> -
>>> - kfree(ctx);
>>> -}
>>> -
>>> int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32
>>> instance,
>>> u32 ring, struct drm_sched_entity **entity)
>>> {
>>> + int r;
>>> +
>>> if (hw_ip >= AMDGPU_HW_IP_NUM) {
>>> DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
>>> return -EINVAL;
>>> @@ -244,6 +257,13 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx
>>> *ctx, u32 hw_ip, u32 instance,
>>> return -EINVAL;
>>> }
>>> + if (ctx->entities[hw_ip][ring].sequence == -1) {
>>> + r = amdgpu_ctx_init_entity(ctx, hw_ip);
>>> +
>>> + if (r)
>>> + return r;
>>> + }
>>> +
>>> *entity = &ctx->entities[hw_ip][ring].entity;
>>> return 0;
>>> }
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx at lists.freedesktop.org
>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=02%7C01%7CNirmoy.Das%40amd.com%7C6a5d5bd84fa44716094008d772556bbf%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637103583527091961&sdata=iIvNTsPHlYa0xoayCjVJMZ4cgBhhvIa6hVKfmUYYhbY%3D&reserved=0
>>
>
More information about the amd-gfx
mailing list